diff --git a/model.safetensors b/model.safetensors index ba330b87bda5bf85ac12a222f1540ee33bc064d6..87cdb3217aa5c39882779809f1c96596fd16c1b4 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d4e98bc19214e703abd204c0ffebd30d38df81e5c2e8f315a5706737f9081bfb +oid sha256:dfa9c57c1a422417816b316606c2d878a3bcb3f7c6d7607c8f81010ace74cee3 size 94763496 diff --git a/run-13/checkpoint-144/config.json b/run-13/checkpoint-144/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-13/checkpoint-144/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-13/checkpoint-144/model.safetensors b/run-13/checkpoint-144/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6dbb5ac2a22fc7e750d4525943c87bea66451f7e --- /dev/null +++ b/run-13/checkpoint-144/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3876c6c1df2b682d9bf903a731bf1f13fc85ae1801e9e6f5c4bca95546bf699 +size 94763496 diff --git a/run-13/checkpoint-144/optimizer.pt b/run-13/checkpoint-144/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..26486d0b133268a8aa0444326c6a9c9a8b5bcedf --- /dev/null +++ b/run-13/checkpoint-144/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd7e97044eeee28229692efe53fb1816326bd164c3a15e2c78dfa1fe22d19388 +size 189552570 diff --git a/run-13/checkpoint-144/preprocessor_config.json b/run-13/checkpoint-144/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-13/checkpoint-144/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-13/checkpoint-144/rng_state.pth b/run-13/checkpoint-144/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..35674c44088a3585f8cd11a1eb144d356856a804 --- /dev/null +++ b/run-13/checkpoint-144/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b8bdd7b4355fd23f0b8256efb0158e4240e11263e992a13d50944c37692500 +size 14244 diff --git a/run-13/checkpoint-144/scheduler.pt b/run-13/checkpoint-144/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..495e897e6c117ab93f27451222e10c207d1ad3c2 --- /dev/null +++ b/run-13/checkpoint-144/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9cdbc15afa9058e3779fdb5b5182ac793f6da43b2bd229a164f2a22e5982ae +size 1064 diff --git a/run-13/checkpoint-144/trainer_state.json b/run-13/checkpoint-144/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cbf3ece0d9628a89bf929f0245c3561d72797e76 --- /dev/null +++ b/run-13/checkpoint-144/trainer_state.json @@ -0,0 +1,247 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-13/checkpoint-48", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 144, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2512309551239014, + "learning_rate": 1.034188035217011e-05, + "loss": 0.7013, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.0938308238983154, + "learning_rate": 2.068376070434022e-05, + "loss": 0.6709, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.029103398323059, + "learning_rate": 3.102564105651033e-05, + "loss": 0.676, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.7623715400695801, + "learning_rate": 4.136752140868044e-05, + "loss": 0.6371, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.6755096912384033, + "learning_rate": 5.170940176085055e-05, + "loss": 0.6148, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.48364195227622986, + "learning_rate": 6.205128211302065e-05, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 3.724107503890991, + "learning_rate": 7.239316246519077e-05, + "loss": 0.5257, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.4992948770523071, + "learning_rate": 8.273504281736088e-05, + "loss": 0.7212, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 4.220996856689453, + "learning_rate": 9.307692316953099e-05, + "loss": 0.6503, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7372970581054688, + "eval_runtime": 1.384, + "eval_samples_per_second": 46.242, + "eval_steps_per_second": 5.78, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.6990180015563965, + "learning_rate": 9.882241225406994e-05, + "loss": 0.6895, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 1.5386626720428467, + "learning_rate": 9.767331443716214e-05, + "loss": 0.6802, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 1.7925231456756592, + "learning_rate": 9.652421662025435e-05, + "loss": 0.4231, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 1.0101344585418701, + "learning_rate": 9.560493836672811e-05, + "loss": 0.5768, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.2472072839736938, + "learning_rate": 9.445584054982033e-05, + "loss": 0.5095, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": 3.8222692012786865, + "learning_rate": 9.330674273291253e-05, + "loss": 0.8618, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.367830276489258, + "learning_rate": 9.215764491600475e-05, + "loss": 0.6321, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.2242908477783203, + "learning_rate": 9.100854709909696e-05, + "loss": 0.6086, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.1424415111541748, + "learning_rate": 8.985944928218916e-05, + "loss": 0.6172, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 1.9207895994186401, + "learning_rate": 8.871035146528138e-05, + "loss": 0.6318, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6706695556640625, + "eval_runtime": 1.3744, + "eval_samples_per_second": 46.567, + "eval_steps_per_second": 5.821, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.3165736198425293, + "learning_rate": 8.756125364837358e-05, + "loss": 0.6359, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.2420493364334106, + "learning_rate": 8.64121558314658e-05, + "loss": 0.5142, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.2290922403335571, + "learning_rate": 8.526305801455801e-05, + "loss": 0.643, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.1532477140426636, + "learning_rate": 8.434377976103179e-05, + "loss": 0.6774, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 6.360584259033203, + "learning_rate": 8.319468194412399e-05, + "loss": 0.5839, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 7.5169677734375, + "learning_rate": 8.204558412721619e-05, + "loss": 0.5138, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 14.515143394470215, + "learning_rate": 8.089648631030841e-05, + "loss": 0.6207, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.682206153869629, + "learning_rate": 7.974738849340061e-05, + "loss": 0.4247, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 5.204345226287842, + "learning_rate": 7.88281102398744e-05, + "loss": 0.3698, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.9202766418457031, + "eval_runtime": 1.4182, + "eval_samples_per_second": 45.129, + "eval_steps_per_second": 5.641, + "step": 144 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2121874430755872.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 9.928205138083305e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-13/checkpoint-144/training_args.bin b/run-13/checkpoint-144/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e56ba67a8f472c421da9b268a6a057d0272d2b2 --- /dev/null +++ b/run-13/checkpoint-144/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6181d77c6ec1f535fee0fbb436cf5e7d6cbe4ec68fcd3368e6e1056eece160ff +size 4920 diff --git a/run-13/checkpoint-192/config.json b/run-13/checkpoint-192/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-13/checkpoint-192/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-13/checkpoint-192/model.safetensors b/run-13/checkpoint-192/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98f7ebb61fc7f419df9fa0a683350df3401162e0 --- /dev/null +++ b/run-13/checkpoint-192/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3262e5adad1b2fffb615ffc1fb15d11f7d2eb9a344b05ff26e79afd8d8379e71 +size 94763496 diff --git a/run-13/checkpoint-192/optimizer.pt b/run-13/checkpoint-192/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b84848b6f7bc7e56f046601797c13114ab7b4da7 --- /dev/null +++ b/run-13/checkpoint-192/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df96c8c3ac0bd44f961e18982f85baa470a90ac9079a906e6858e0085e01ae40 +size 189552570 diff --git a/run-13/checkpoint-192/preprocessor_config.json b/run-13/checkpoint-192/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-13/checkpoint-192/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-13/checkpoint-192/rng_state.pth b/run-13/checkpoint-192/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d76370ee36700e6a498a1fbfff621aca7984a77 --- /dev/null +++ b/run-13/checkpoint-192/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5aef020ca2df517540ac9ff4e195e1c41a7b85939e93195d118078f119bc949 +size 14244 diff --git a/run-13/checkpoint-192/scheduler.pt b/run-13/checkpoint-192/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2453b6b9021c66365a2f7b77f84f4fc29a824aa5 --- /dev/null +++ b/run-13/checkpoint-192/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b32d127c3fc208eaf13706655c4c8adef63953c167a18e059a63ae70d423407 +size 1064 diff --git a/run-13/checkpoint-192/trainer_state.json b/run-13/checkpoint-192/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..528af0a4d14ade5156945b3ab5cca3bf4eb1f26e --- /dev/null +++ b/run-13/checkpoint-192/trainer_state.json @@ -0,0 +1,326 @@ +{ + "best_metric": 0.7474747474747475, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-13/checkpoint-192", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 192, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2512309551239014, + "learning_rate": 1.034188035217011e-05, + "loss": 0.7013, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.0938308238983154, + "learning_rate": 2.068376070434022e-05, + "loss": 0.6709, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.029103398323059, + "learning_rate": 3.102564105651033e-05, + "loss": 0.676, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.7623715400695801, + "learning_rate": 4.136752140868044e-05, + "loss": 0.6371, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.6755096912384033, + "learning_rate": 5.170940176085055e-05, + "loss": 0.6148, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.48364195227622986, + "learning_rate": 6.205128211302065e-05, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 3.724107503890991, + "learning_rate": 7.239316246519077e-05, + "loss": 0.5257, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.4992948770523071, + "learning_rate": 8.273504281736088e-05, + "loss": 0.7212, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 4.220996856689453, + "learning_rate": 9.307692316953099e-05, + "loss": 0.6503, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7372970581054688, + "eval_runtime": 1.384, + "eval_samples_per_second": 46.242, + "eval_steps_per_second": 5.78, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.6990180015563965, + "learning_rate": 9.882241225406994e-05, + "loss": 0.6895, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 1.5386626720428467, + "learning_rate": 9.767331443716214e-05, + "loss": 0.6802, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 1.7925231456756592, + "learning_rate": 9.652421662025435e-05, + "loss": 0.4231, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 1.0101344585418701, + "learning_rate": 9.560493836672811e-05, + "loss": 0.5768, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.2472072839736938, + "learning_rate": 9.445584054982033e-05, + "loss": 0.5095, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": 3.8222692012786865, + "learning_rate": 9.330674273291253e-05, + "loss": 0.8618, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.367830276489258, + "learning_rate": 9.215764491600475e-05, + "loss": 0.6321, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.2242908477783203, + "learning_rate": 9.100854709909696e-05, + "loss": 0.6086, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.1424415111541748, + "learning_rate": 8.985944928218916e-05, + "loss": 0.6172, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 1.9207895994186401, + "learning_rate": 8.871035146528138e-05, + "loss": 0.6318, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6706695556640625, + "eval_runtime": 1.3744, + "eval_samples_per_second": 46.567, + "eval_steps_per_second": 5.821, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.3165736198425293, + "learning_rate": 8.756125364837358e-05, + "loss": 0.6359, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.2420493364334106, + "learning_rate": 8.64121558314658e-05, + "loss": 0.5142, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.2290922403335571, + "learning_rate": 8.526305801455801e-05, + "loss": 0.643, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.1532477140426636, + "learning_rate": 8.434377976103179e-05, + "loss": 0.6774, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 6.360584259033203, + "learning_rate": 8.319468194412399e-05, + "loss": 0.5839, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 7.5169677734375, + "learning_rate": 8.204558412721619e-05, + "loss": 0.5138, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 14.515143394470215, + "learning_rate": 8.089648631030841e-05, + "loss": 0.6207, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.682206153869629, + "learning_rate": 7.974738849340061e-05, + "loss": 0.4247, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 5.204345226287842, + "learning_rate": 7.88281102398744e-05, + "loss": 0.3698, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.9202766418457031, + "eval_runtime": 1.4182, + "eval_samples_per_second": 45.129, + "eval_steps_per_second": 5.641, + "step": 144 + }, + { + "epoch": 3.02, + "grad_norm": 6.481989860534668, + "learning_rate": 7.76790124229666e-05, + "loss": 0.6196, + "step": 145 + }, + { + "epoch": 3.12, + "grad_norm": Infinity, + "learning_rate": 7.675973416944037e-05, + "loss": 0.4192, + "step": 150 + }, + { + "epoch": 3.23, + "grad_norm": 2.9529616832733154, + "learning_rate": 7.561063635253257e-05, + "loss": 0.2996, + "step": 155 + }, + { + "epoch": 3.33, + "grad_norm": 13.315085411071777, + "learning_rate": 7.469135809900635e-05, + "loss": 0.6014, + "step": 160 + }, + { + "epoch": 3.44, + "grad_norm": 3.830242156982422, + "learning_rate": 7.354226028209855e-05, + "loss": 0.2117, + "step": 165 + }, + { + "epoch": 3.54, + "grad_norm": 26.10947608947754, + "learning_rate": 7.239316246519077e-05, + "loss": 0.2918, + "step": 170 + }, + { + "epoch": 3.65, + "grad_norm": 55.73325729370117, + "learning_rate": 7.124406464828297e-05, + "loss": 0.775, + "step": 175 + }, + { + "epoch": 3.75, + "grad_norm": 1.9401394128799438, + "learning_rate": 7.009496683137518e-05, + "loss": 0.4163, + "step": 180 + }, + { + "epoch": 3.85, + "grad_norm": 0.818503201007843, + "learning_rate": 6.89458690144674e-05, + "loss": 0.3559, + "step": 185 + }, + { + "epoch": 3.96, + "grad_norm": 6.375278472900391, + "learning_rate": 6.77967711975596e-05, + "loss": 0.5548, + "step": 190 + }, + { + "epoch": 4.0, + "eval_f1": 0.7474747474747475, + "eval_loss": 1.170898675918579, + "eval_runtime": 1.3939, + "eval_samples_per_second": 45.913, + "eval_steps_per_second": 5.739, + "step": 192 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2891755054954176.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 9.928205138083305e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-13/checkpoint-192/training_args.bin b/run-13/checkpoint-192/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e56ba67a8f472c421da9b268a6a057d0272d2b2 --- /dev/null +++ b/run-13/checkpoint-192/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6181d77c6ec1f535fee0fbb436cf5e7d6cbe4ec68fcd3368e6e1056eece160ff +size 4920 diff --git a/run-13/checkpoint-96/config.json b/run-13/checkpoint-96/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-13/checkpoint-96/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-13/checkpoint-96/model.safetensors b/run-13/checkpoint-96/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45423359c4f882c51293d9054e27f4b41aa87bee --- /dev/null +++ b/run-13/checkpoint-96/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c12b8d2b75a071bf5e832285aa494f6c0b5a6337c618f408c9b1b0619da5aaca +size 94763496 diff --git a/run-13/checkpoint-96/optimizer.pt b/run-13/checkpoint-96/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f5f9e36908c07ca31308a0359a0c2b85520c83f --- /dev/null +++ b/run-13/checkpoint-96/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27693d8a54d2c8c8ccf19cc1c96d45d895fda3a5dd6ee169ec6503d7194cbd67 +size 189552570 diff --git a/run-13/checkpoint-96/preprocessor_config.json b/run-13/checkpoint-96/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-13/checkpoint-96/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-13/checkpoint-96/rng_state.pth b/run-13/checkpoint-96/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5df9532d48eec28233ca1958234673b2505309f1 --- /dev/null +++ b/run-13/checkpoint-96/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dbf03bf644af79257aec95c925042cb81a469bfcc7a839a95d68f1d0425513 +size 14244 diff --git a/run-13/checkpoint-96/scheduler.pt b/run-13/checkpoint-96/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d833c25b62cefc073614b7cc6add8a9a5ae41662 --- /dev/null +++ b/run-13/checkpoint-96/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c63af2dd8f0e1bb01f0d7e66947ad84d12f8c6c65a2423368262dc1e83a7ab1 +size 1064 diff --git a/run-13/checkpoint-96/trainer_state.json b/run-13/checkpoint-96/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..506f20ede7a372e1fd508460b4343e8310460e84 --- /dev/null +++ b/run-13/checkpoint-96/trainer_state.json @@ -0,0 +1,175 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-13/checkpoint-48", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 96, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2512309551239014, + "learning_rate": 1.034188035217011e-05, + "loss": 0.7013, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.0938308238983154, + "learning_rate": 2.068376070434022e-05, + "loss": 0.6709, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.029103398323059, + "learning_rate": 3.102564105651033e-05, + "loss": 0.676, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.7623715400695801, + "learning_rate": 4.136752140868044e-05, + "loss": 0.6371, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.6755096912384033, + "learning_rate": 5.170940176085055e-05, + "loss": 0.6148, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.48364195227622986, + "learning_rate": 6.205128211302065e-05, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 3.724107503890991, + "learning_rate": 7.239316246519077e-05, + "loss": 0.5257, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.4992948770523071, + "learning_rate": 8.273504281736088e-05, + "loss": 0.7212, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 4.220996856689453, + "learning_rate": 9.307692316953099e-05, + "loss": 0.6503, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7372970581054688, + "eval_runtime": 1.384, + "eval_samples_per_second": 46.242, + "eval_steps_per_second": 5.78, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.6990180015563965, + "learning_rate": 9.882241225406994e-05, + "loss": 0.6895, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 1.5386626720428467, + "learning_rate": 9.767331443716214e-05, + "loss": 0.6802, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 1.7925231456756592, + "learning_rate": 9.652421662025435e-05, + "loss": 0.4231, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 1.0101344585418701, + "learning_rate": 9.560493836672811e-05, + "loss": 0.5768, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.2472072839736938, + "learning_rate": 9.445584054982033e-05, + "loss": 0.5095, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": 3.8222692012786865, + "learning_rate": 9.330674273291253e-05, + "loss": 0.8618, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.367830276489258, + "learning_rate": 9.215764491600475e-05, + "loss": 0.6321, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.2242908477783203, + "learning_rate": 9.100854709909696e-05, + "loss": 0.6086, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.1424415111541748, + "learning_rate": 8.985944928218916e-05, + "loss": 0.6172, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 1.9207895994186401, + "learning_rate": 8.871035146528138e-05, + "loss": 0.6318, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6706695556640625, + "eval_runtime": 1.3744, + "eval_samples_per_second": 46.567, + "eval_steps_per_second": 5.821, + "step": 96 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1442567462539200.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 9.928205138083305e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-13/checkpoint-96/training_args.bin b/run-13/checkpoint-96/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e56ba67a8f472c421da9b268a6a057d0272d2b2 --- /dev/null +++ b/run-13/checkpoint-96/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6181d77c6ec1f535fee0fbb436cf5e7d6cbe4ec68fcd3368e6e1056eece160ff +size 4920 diff --git a/run-14/checkpoint-144/config.json b/run-14/checkpoint-144/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-14/checkpoint-144/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-14/checkpoint-144/model.safetensors b/run-14/checkpoint-144/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36689bb7b31389f06e3a0848afa102b3d883185f --- /dev/null +++ b/run-14/checkpoint-144/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4f9758ea480b5414cd4643e6fb5ae119b64487091753c4af492052263ff061 +size 94763496 diff --git a/run-14/checkpoint-144/optimizer.pt b/run-14/checkpoint-144/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..466c020cf0bb3052404be1057d04aace849dbbd4 --- /dev/null +++ b/run-14/checkpoint-144/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c82c386655c5e3c53096812ec800237e6815adf2ae47b1765069888f4bc9b7a1 +size 189552570 diff --git a/run-14/checkpoint-144/preprocessor_config.json b/run-14/checkpoint-144/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-14/checkpoint-144/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-14/checkpoint-144/rng_state.pth b/run-14/checkpoint-144/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..35674c44088a3585f8cd11a1eb144d356856a804 --- /dev/null +++ b/run-14/checkpoint-144/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b8bdd7b4355fd23f0b8256efb0158e4240e11263e992a13d50944c37692500 +size 14244 diff --git a/run-14/checkpoint-144/scheduler.pt b/run-14/checkpoint-144/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7422bdfcd5e7b6a5edb719b17f5b753b1e00bcc0 --- /dev/null +++ b/run-14/checkpoint-144/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51de9a2f2a1ea0a358d768e1268974f6355f99f9552b45c19d9f7b2227d52121 +size 1064 diff --git a/run-14/checkpoint-144/trainer_state.json b/run-14/checkpoint-144/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2c632070b19c52b57c561f61af11955bdfd15188 --- /dev/null +++ b/run-14/checkpoint-144/trainer_state.json @@ -0,0 +1,247 @@ +{ + "best_metric": 0.7346938775510203, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-14/checkpoint-144", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 144, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.263960838317871, + "learning_rate": 4.556569949810922e-06, + "loss": 0.7021, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1750551462173462, + "learning_rate": 9.113139899621844e-06, + "loss": 0.6857, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.3693294525146484, + "learning_rate": 1.3669709849432766e-05, + "loss": 0.6849, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.9440478086471558, + "learning_rate": 1.822627979924369e-05, + "loss": 0.6652, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.9551452398300171, + "learning_rate": 2.2782849749054612e-05, + "loss": 0.64, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.777751088142395, + "learning_rate": 2.7339419698865533e-05, + "loss": 0.6269, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.4816713333129883, + "learning_rate": 3.1895989648676456e-05, + "loss": 0.5586, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.696999430656433, + "learning_rate": 3.645255959848738e-05, + "loss": 0.6586, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.2336232662200928, + "learning_rate": 4.10091295482983e-05, + "loss": 0.6259, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7427406311035156, + "eval_runtime": 1.4051, + "eval_samples_per_second": 45.548, + "eval_steps_per_second": 5.694, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.544165134429932, + "learning_rate": 4.354055729819326e-05, + "loss": 0.7, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.8700355887413025, + "learning_rate": 4.3034271748214265e-05, + "loss": 0.6972, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 1.7657314538955688, + "learning_rate": 4.252798619823527e-05, + "loss": 0.4085, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.6718289852142334, + "learning_rate": 4.202170064825628e-05, + "loss": 0.5618, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.5080432891845703, + "learning_rate": 4.151541509827729e-05, + "loss": 0.4822, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 4.11103866582941e-05, + "loss": 0.7679, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 1.8398224115371704, + "learning_rate": 4.0604101108315106e-05, + "loss": 0.6183, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.7079250812530518, + "learning_rate": 4.0097815558336114e-05, + "loss": 0.5954, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8899829387664795, + "learning_rate": 3.959153000835712e-05, + "loss": 0.596, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.904428243637085, + "learning_rate": 3.908524445837813e-05, + "loss": 0.6094, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.673431396484375, + "eval_runtime": 1.3605, + "eval_samples_per_second": 47.041, + "eval_steps_per_second": 5.88, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.0948293209075928, + "learning_rate": 3.857895890839914e-05, + "loss": 0.5749, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.2664576768875122, + "learning_rate": 3.807267335842015e-05, + "loss": 0.4808, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.7244511842727661, + "learning_rate": 3.7566387808441155e-05, + "loss": 0.5641, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.1561750173568726, + "learning_rate": 3.706010225846216e-05, + "loss": 0.5395, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": Infinity, + "learning_rate": 3.665507381847897e-05, + "loss": 0.5224, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 6.8102827072143555, + "learning_rate": 3.614878826849998e-05, + "loss": 0.4427, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": Infinity, + "learning_rate": 3.574375982851679e-05, + "loss": 0.6088, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 3.278684139251709, + "learning_rate": 3.52374742785378e-05, + "loss": 0.3563, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 4.377391815185547, + "learning_rate": 3.4731188728558806e-05, + "loss": 0.3177, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.7346938775510203, + "eval_loss": 0.804656982421875, + "eval_runtime": 1.3676, + "eval_samples_per_second": 46.796, + "eval_steps_per_second": 5.85, + "step": 144 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2121874430755872.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 4.374307151818485e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-14/checkpoint-144/training_args.bin b/run-14/checkpoint-144/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..26651e49c7467e1c295e5f44f6005b7925d473db --- /dev/null +++ b/run-14/checkpoint-144/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08323d3f888db4d96cf1275214c0bc1494a12c503c7492df7675d3c2976246ad +size 4920 diff --git a/run-14/checkpoint-48/config.json b/run-14/checkpoint-48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-14/checkpoint-48/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-14/checkpoint-48/model.safetensors b/run-14/checkpoint-48/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac415e4f45c230176c06043f8aa455a16e3715db --- /dev/null +++ b/run-14/checkpoint-48/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc860643e0442d477e3db2d795cea596ec406f49262f0b2921abc98e53a278a +size 94763496 diff --git a/run-14/checkpoint-48/optimizer.pt b/run-14/checkpoint-48/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d3dbdedad1a472660a4e037c2ede1e198e2975e --- /dev/null +++ b/run-14/checkpoint-48/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41f0c7d3378515129e4de3df8bdd50f14018dea647741bfa2a5a7b0160fb69c8 +size 189552570 diff --git a/run-14/checkpoint-48/preprocessor_config.json b/run-14/checkpoint-48/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-14/checkpoint-48/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-14/checkpoint-48/rng_state.pth b/run-14/checkpoint-48/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f69ac2b3cc24a2d23f1e99dfab26d0a1d84a680 --- /dev/null +++ b/run-14/checkpoint-48/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7251f0e64bf9e5675ed89b468a7ff74c1c3fd6457742f84db0e5e361db11f13 +size 14244 diff --git a/run-14/checkpoint-48/scheduler.pt b/run-14/checkpoint-48/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccc7fc2fe30e9e32bd3adb6f3a487d1d9751ab1f --- /dev/null +++ b/run-14/checkpoint-48/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:850ccdd16609d418223240119cbf87863f10dc2294422d17358d1418f070270d +size 1064 diff --git a/run-14/checkpoint-48/trainer_state.json b/run-14/checkpoint-48/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c20fec4e6322e201bccb743aa19331230dcc89ec --- /dev/null +++ b/run-14/checkpoint-48/trainer_state.json @@ -0,0 +1,96 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-14/checkpoint-48", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 48, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.263960838317871, + "learning_rate": 4.556569949810922e-06, + "loss": 0.7021, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1750551462173462, + "learning_rate": 9.113139899621844e-06, + "loss": 0.6857, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.3693294525146484, + "learning_rate": 1.3669709849432766e-05, + "loss": 0.6849, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.9440478086471558, + "learning_rate": 1.822627979924369e-05, + "loss": 0.6652, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.9551452398300171, + "learning_rate": 2.2782849749054612e-05, + "loss": 0.64, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.777751088142395, + "learning_rate": 2.7339419698865533e-05, + "loss": 0.6269, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.4816713333129883, + "learning_rate": 3.1895989648676456e-05, + "loss": 0.5586, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.696999430656433, + "learning_rate": 3.645255959848738e-05, + "loss": 0.6586, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.2336232662200928, + "learning_rate": 4.10091295482983e-05, + "loss": 0.6259, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7427406311035156, + "eval_runtime": 1.4051, + "eval_samples_per_second": 45.548, + "eval_steps_per_second": 5.694, + "step": 48 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 670686130935120.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 4.374307151818485e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-14/checkpoint-48/training_args.bin b/run-14/checkpoint-48/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..26651e49c7467e1c295e5f44f6005b7925d473db --- /dev/null +++ b/run-14/checkpoint-48/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08323d3f888db4d96cf1275214c0bc1494a12c503c7492df7675d3c2976246ad +size 4920 diff --git a/run-14/checkpoint-96/config.json b/run-14/checkpoint-96/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-14/checkpoint-96/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-14/checkpoint-96/model.safetensors b/run-14/checkpoint-96/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c2387bcc28ad877f2f67d4f7a3b1c7a82ea234a --- /dev/null +++ b/run-14/checkpoint-96/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:970da1c4501d894071e1745ddd25b3433221a0df24b92676742e39b527c82fba +size 94763496 diff --git a/run-14/checkpoint-96/optimizer.pt b/run-14/checkpoint-96/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..840af41f2376c5b8b21e06a631b5c738bbaabfc3 --- /dev/null +++ b/run-14/checkpoint-96/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:630d07de2c2d75949124972c6fcff497d75f5edc0096dc94b40290bcd0d6109c +size 189552570 diff --git a/run-14/checkpoint-96/preprocessor_config.json b/run-14/checkpoint-96/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-14/checkpoint-96/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-14/checkpoint-96/rng_state.pth b/run-14/checkpoint-96/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5df9532d48eec28233ca1958234673b2505309f1 --- /dev/null +++ b/run-14/checkpoint-96/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dbf03bf644af79257aec95c925042cb81a469bfcc7a839a95d68f1d0425513 +size 14244 diff --git a/run-14/checkpoint-96/scheduler.pt b/run-14/checkpoint-96/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..96b0c7dc4133f26d36fe27c261c7c13a359c7714 --- /dev/null +++ b/run-14/checkpoint-96/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a0a0fb57ed8a0d88d453ecc2fea8f8423b6a7ed1fd472dbd8806bc9412c8db3 +size 1064 diff --git a/run-14/checkpoint-96/trainer_state.json b/run-14/checkpoint-96/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e44fb393b8f239679b437d060414c9d838fbf14f --- /dev/null +++ b/run-14/checkpoint-96/trainer_state.json @@ -0,0 +1,175 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-14/checkpoint-48", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 96, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.263960838317871, + "learning_rate": 4.556569949810922e-06, + "loss": 0.7021, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1750551462173462, + "learning_rate": 9.113139899621844e-06, + "loss": 0.6857, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.3693294525146484, + "learning_rate": 1.3669709849432766e-05, + "loss": 0.6849, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.9440478086471558, + "learning_rate": 1.822627979924369e-05, + "loss": 0.6652, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.9551452398300171, + "learning_rate": 2.2782849749054612e-05, + "loss": 0.64, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.777751088142395, + "learning_rate": 2.7339419698865533e-05, + "loss": 0.6269, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.4816713333129883, + "learning_rate": 3.1895989648676456e-05, + "loss": 0.5586, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.696999430656433, + "learning_rate": 3.645255959848738e-05, + "loss": 0.6586, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.2336232662200928, + "learning_rate": 4.10091295482983e-05, + "loss": 0.6259, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7427406311035156, + "eval_runtime": 1.4051, + "eval_samples_per_second": 45.548, + "eval_steps_per_second": 5.694, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.544165134429932, + "learning_rate": 4.354055729819326e-05, + "loss": 0.7, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.8700355887413025, + "learning_rate": 4.3034271748214265e-05, + "loss": 0.6972, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 1.7657314538955688, + "learning_rate": 4.252798619823527e-05, + "loss": 0.4085, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.6718289852142334, + "learning_rate": 4.202170064825628e-05, + "loss": 0.5618, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.5080432891845703, + "learning_rate": 4.151541509827729e-05, + "loss": 0.4822, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 4.11103866582941e-05, + "loss": 0.7679, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 1.8398224115371704, + "learning_rate": 4.0604101108315106e-05, + "loss": 0.6183, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.7079250812530518, + "learning_rate": 4.0097815558336114e-05, + "loss": 0.5954, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8899829387664795, + "learning_rate": 3.959153000835712e-05, + "loss": 0.596, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.904428243637085, + "learning_rate": 3.908524445837813e-05, + "loss": 0.6094, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.673431396484375, + "eval_runtime": 1.3605, + "eval_samples_per_second": 47.041, + "eval_steps_per_second": 5.88, + "step": 96 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1442567462539200.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 4.374307151818485e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-14/checkpoint-96/training_args.bin b/run-14/checkpoint-96/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..26651e49c7467e1c295e5f44f6005b7925d473db --- /dev/null +++ b/run-14/checkpoint-96/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08323d3f888db4d96cf1275214c0bc1494a12c503c7492df7675d3c2976246ad +size 4920 diff --git a/run-15/checkpoint-144/config.json b/run-15/checkpoint-144/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-15/checkpoint-144/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-15/checkpoint-144/model.safetensors b/run-15/checkpoint-144/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7502002c5d0871386760ccf05346cf1068ea522 --- /dev/null +++ b/run-15/checkpoint-144/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42c518bf247bf5e9f6767c0caab13456b43ce02ffe1b9bf9ee58d643e97bfb30 +size 94763496 diff --git a/run-15/checkpoint-144/optimizer.pt b/run-15/checkpoint-144/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d774763e732c97f9a6cef0d5786622ffcd9ff0aa --- /dev/null +++ b/run-15/checkpoint-144/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32fce45ff5d139e2d8040b716633a02fb2b7e4b50a1963619f6660a864bcc8cf +size 189552570 diff --git a/run-15/checkpoint-144/preprocessor_config.json b/run-15/checkpoint-144/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-15/checkpoint-144/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-15/checkpoint-144/rng_state.pth b/run-15/checkpoint-144/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..35674c44088a3585f8cd11a1eb144d356856a804 --- /dev/null +++ b/run-15/checkpoint-144/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b8bdd7b4355fd23f0b8256efb0158e4240e11263e992a13d50944c37692500 +size 14244 diff --git a/run-15/checkpoint-144/scheduler.pt b/run-15/checkpoint-144/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6399911c05ae9590ec88690d162e51c0b478c05a --- /dev/null +++ b/run-15/checkpoint-144/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:152095c615cec617dc888f35bb2504983feaf9b388cd82a0028e51c7789c4084 +size 1064 diff --git a/run-15/checkpoint-144/trainer_state.json b/run-15/checkpoint-144/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..30bb67b9ba32617865d269c2be11b04f033fffe4 --- /dev/null +++ b/run-15/checkpoint-144/trainer_state.json @@ -0,0 +1,247 @@ +{ + "best_metric": 0.74, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-15/checkpoint-144", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 144, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2651739120483398, + "learning_rate": 3.8109672513839134e-06, + "loss": 0.7021, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1859484910964966, + "learning_rate": 7.621934502767827e-06, + "loss": 0.6876, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.4216609001159668, + "learning_rate": 1.143290175415174e-05, + "loss": 0.6866, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.9709990620613098, + "learning_rate": 1.5243869005535653e-05, + "loss": 0.67, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.9969441890716553, + "learning_rate": 1.9054836256919568e-05, + "loss": 0.647, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.8254842758178711, + "learning_rate": 2.286580350830348e-05, + "loss": 0.634, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.2362442016601562, + "learning_rate": 2.667677075968739e-05, + "loss": 0.5716, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.7431644201278687, + "learning_rate": 3.0487738011071307e-05, + "loss": 0.653, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 2.926987648010254, + "learning_rate": 3.429870526245522e-05, + "loss": 0.6223, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.72772216796875, + "eval_runtime": 1.3907, + "eval_samples_per_second": 46.021, + "eval_steps_per_second": 5.753, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.254884719848633, + "learning_rate": 3.641590929100184e-05, + "loss": 0.6903, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.8755391240119934, + "learning_rate": 3.5992468485292515e-05, + "loss": 0.6891, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 1.7239640951156616, + "learning_rate": 3.556902767958319e-05, + "loss": 0.4136, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.6247442960739136, + "learning_rate": 3.514558687387387e-05, + "loss": 0.5632, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.531494140625, + "learning_rate": 3.4722146068164546e-05, + "loss": 0.4834, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 3.4383393423597085e-05, + "loss": 0.7636, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 1.7097152471542358, + "learning_rate": 3.395995261788776e-05, + "loss": 0.6194, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.788198471069336, + "learning_rate": 3.353651181217844e-05, + "loss": 0.6013, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.9111418724060059, + "learning_rate": 3.311307100646911e-05, + "loss": 0.6008, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 1.2587237358093262, + "learning_rate": 3.268963020075979e-05, + "loss": 0.6136, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6896953582763672, + "eval_runtime": 1.3733, + "eval_samples_per_second": 46.605, + "eval_steps_per_second": 5.826, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 0.9821504950523376, + "learning_rate": 3.2266189395050464e-05, + "loss": 0.5849, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.001234769821167, + "learning_rate": 3.184274858934114e-05, + "loss": 0.4816, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.6126900911331177, + "learning_rate": 3.141930778363182e-05, + "loss": 0.5824, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.0846352577209473, + "learning_rate": 3.0995866977922495e-05, + "loss": 0.5605, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": Infinity, + "learning_rate": 3.0657114333355034e-05, + "loss": 0.5292, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 3.342069625854492, + "learning_rate": 3.023367352764571e-05, + "loss": 0.4602, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 13.714946746826172, + "learning_rate": 2.9894920883078253e-05, + "loss": 0.6429, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.8365912437438965, + "learning_rate": 2.947148007736893e-05, + "loss": 0.38, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 3.0548512935638428, + "learning_rate": 2.9048039271659608e-05, + "loss": 0.3343, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.74, + "eval_loss": 0.7983360290527344, + "eval_runtime": 1.3751, + "eval_samples_per_second": 46.543, + "eval_steps_per_second": 5.818, + "step": 144 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2143403497915440.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 3.658528561328557e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-15/checkpoint-144/training_args.bin b/run-15/checkpoint-144/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b417116bc636c21fa2a16c6c86932f0688f349c --- /dev/null +++ b/run-15/checkpoint-144/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8035c35125d475778fd22c1478d219e4883a1e9849f409c7e91b97e4369435 +size 4920 diff --git a/run-15/checkpoint-48/config.json b/run-15/checkpoint-48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-15/checkpoint-48/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-15/checkpoint-48/model.safetensors b/run-15/checkpoint-48/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f455c53be3c3d2dc7d4a3b91be058faf89ff076f --- /dev/null +++ b/run-15/checkpoint-48/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b0ac0664d1952efa5ec381261615cadec84ad1b87d98959c5647842c2f4ae1a +size 94763496 diff --git a/run-15/checkpoint-48/optimizer.pt b/run-15/checkpoint-48/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ec6b6f3510cec3e0de7c53d278a6e648fe06797 --- /dev/null +++ b/run-15/checkpoint-48/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6981a72855ddc887658e411ec95205a1d7f682f6e8ba536932cdf13929acd033 +size 189552570 diff --git a/run-15/checkpoint-48/preprocessor_config.json b/run-15/checkpoint-48/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-15/checkpoint-48/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-15/checkpoint-48/rng_state.pth b/run-15/checkpoint-48/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f69ac2b3cc24a2d23f1e99dfab26d0a1d84a680 --- /dev/null +++ b/run-15/checkpoint-48/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7251f0e64bf9e5675ed89b468a7ff74c1c3fd6457742f84db0e5e361db11f13 +size 14244 diff --git a/run-15/checkpoint-48/scheduler.pt b/run-15/checkpoint-48/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c4fa1f39a75540c297535d5196132e11bea1193 --- /dev/null +++ b/run-15/checkpoint-48/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62fe0e83cf86e4b000eeec4bdd9dcd38210c2b0e31a7600171eff5435d29d825 +size 1064 diff --git a/run-15/checkpoint-48/trainer_state.json b/run-15/checkpoint-48/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d000fadac31704a35854a60422cbd1b7e6d6e579 --- /dev/null +++ b/run-15/checkpoint-48/trainer_state.json @@ -0,0 +1,96 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-15/checkpoint-48", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 48, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2651739120483398, + "learning_rate": 3.8109672513839134e-06, + "loss": 0.7021, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1859484910964966, + "learning_rate": 7.621934502767827e-06, + "loss": 0.6876, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.4216609001159668, + "learning_rate": 1.143290175415174e-05, + "loss": 0.6866, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.9709990620613098, + "learning_rate": 1.5243869005535653e-05, + "loss": 0.67, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.9969441890716553, + "learning_rate": 1.9054836256919568e-05, + "loss": 0.647, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.8254842758178711, + "learning_rate": 2.286580350830348e-05, + "loss": 0.634, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.2362442016601562, + "learning_rate": 2.667677075968739e-05, + "loss": 0.5716, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.7431644201278687, + "learning_rate": 3.0487738011071307e-05, + "loss": 0.653, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 2.926987648010254, + "learning_rate": 3.429870526245522e-05, + "loss": 0.6223, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.72772216796875, + "eval_runtime": 1.3907, + "eval_samples_per_second": 46.021, + "eval_steps_per_second": 5.753, + "step": 48 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 692215198094688.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 3.658528561328557e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-15/checkpoint-48/training_args.bin b/run-15/checkpoint-48/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b417116bc636c21fa2a16c6c86932f0688f349c --- /dev/null +++ b/run-15/checkpoint-48/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8035c35125d475778fd22c1478d219e4883a1e9849f409c7e91b97e4369435 +size 4920 diff --git a/run-15/checkpoint-96/config.json b/run-15/checkpoint-96/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-15/checkpoint-96/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-15/checkpoint-96/model.safetensors b/run-15/checkpoint-96/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8e85c958e7b176c9f21cbeabfa0ecde5c96fbf0 --- /dev/null +++ b/run-15/checkpoint-96/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f86a74c34fc715e6be68c09a3636d2ae279ff8d5507689e2104af887e4253afd +size 94763496 diff --git a/run-15/checkpoint-96/optimizer.pt b/run-15/checkpoint-96/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..98b958230ac99d85de4b14bcfacd3de58c4f7392 --- /dev/null +++ b/run-15/checkpoint-96/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8802e4dbd23dc72986f266ece212e8a528a80a951c128488ce44a2bcd41ab25 +size 189552570 diff --git a/run-15/checkpoint-96/preprocessor_config.json b/run-15/checkpoint-96/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-15/checkpoint-96/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-15/checkpoint-96/rng_state.pth b/run-15/checkpoint-96/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5df9532d48eec28233ca1958234673b2505309f1 --- /dev/null +++ b/run-15/checkpoint-96/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dbf03bf644af79257aec95c925042cb81a469bfcc7a839a95d68f1d0425513 +size 14244 diff --git a/run-15/checkpoint-96/scheduler.pt b/run-15/checkpoint-96/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f601a29ef878847c4a4e01ea1f65ec11d903ae7 --- /dev/null +++ b/run-15/checkpoint-96/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cda012e180bf6fa64aac7349bebb29b0d25d05ae49f718250eeb707cfec95d42 +size 1064 diff --git a/run-15/checkpoint-96/trainer_state.json b/run-15/checkpoint-96/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2878d103cbbff155a29ad78aeedea4b4e257edd7 --- /dev/null +++ b/run-15/checkpoint-96/trainer_state.json @@ -0,0 +1,175 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-15/checkpoint-48", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 96, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2651739120483398, + "learning_rate": 3.8109672513839134e-06, + "loss": 0.7021, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1859484910964966, + "learning_rate": 7.621934502767827e-06, + "loss": 0.6876, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.4216609001159668, + "learning_rate": 1.143290175415174e-05, + "loss": 0.6866, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.9709990620613098, + "learning_rate": 1.5243869005535653e-05, + "loss": 0.67, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.9969441890716553, + "learning_rate": 1.9054836256919568e-05, + "loss": 0.647, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.8254842758178711, + "learning_rate": 2.286580350830348e-05, + "loss": 0.634, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.2362442016601562, + "learning_rate": 2.667677075968739e-05, + "loss": 0.5716, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.7431644201278687, + "learning_rate": 3.0487738011071307e-05, + "loss": 0.653, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 2.926987648010254, + "learning_rate": 3.429870526245522e-05, + "loss": 0.6223, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.72772216796875, + "eval_runtime": 1.3907, + "eval_samples_per_second": 46.021, + "eval_steps_per_second": 5.753, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.254884719848633, + "learning_rate": 3.641590929100184e-05, + "loss": 0.6903, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.8755391240119934, + "learning_rate": 3.5992468485292515e-05, + "loss": 0.6891, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 1.7239640951156616, + "learning_rate": 3.556902767958319e-05, + "loss": 0.4136, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.6247442960739136, + "learning_rate": 3.514558687387387e-05, + "loss": 0.5632, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.531494140625, + "learning_rate": 3.4722146068164546e-05, + "loss": 0.4834, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 3.4383393423597085e-05, + "loss": 0.7636, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 1.7097152471542358, + "learning_rate": 3.395995261788776e-05, + "loss": 0.6194, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.788198471069336, + "learning_rate": 3.353651181217844e-05, + "loss": 0.6013, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.9111418724060059, + "learning_rate": 3.311307100646911e-05, + "loss": 0.6008, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 1.2587237358093262, + "learning_rate": 3.268963020075979e-05, + "loss": 0.6136, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6896953582763672, + "eval_runtime": 1.3733, + "eval_samples_per_second": 46.605, + "eval_steps_per_second": 5.826, + "step": 96 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1464096529698768.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 3.658528561328557e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-15/checkpoint-96/training_args.bin b/run-15/checkpoint-96/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b417116bc636c21fa2a16c6c86932f0688f349c --- /dev/null +++ b/run-15/checkpoint-96/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8035c35125d475778fd22c1478d219e4883a1e9849f409c7e91b97e4369435 +size 4920 diff --git a/run-16/checkpoint-144/config.json b/run-16/checkpoint-144/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-144/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-144/model.safetensors b/run-16/checkpoint-144/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02423b4f6ced4ee70b7435dc788e875cab47ea04 --- /dev/null +++ b/run-16/checkpoint-144/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50fde7fc8285a435475932caac66368c2853d739ac6ba569a9e93b05ebf2f1eb +size 94763496 diff --git a/run-16/checkpoint-144/optimizer.pt b/run-16/checkpoint-144/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e827640224961d13016813f5846913d6e5bda67d --- /dev/null +++ b/run-16/checkpoint-144/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7cad85ee421fa6a758e9864306ed6d8abfdc54d1cfb576e75aca8717b29ca0 +size 189552570 diff --git a/run-16/checkpoint-144/preprocessor_config.json b/run-16/checkpoint-144/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-144/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-144/rng_state.pth b/run-16/checkpoint-144/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..35674c44088a3585f8cd11a1eb144d356856a804 --- /dev/null +++ b/run-16/checkpoint-144/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87b8bdd7b4355fd23f0b8256efb0158e4240e11263e992a13d50944c37692500 +size 14244 diff --git a/run-16/checkpoint-144/scheduler.pt b/run-16/checkpoint-144/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7acb8c2fe90753d2151606d1a0aed69b6c394e5d --- /dev/null +++ b/run-16/checkpoint-144/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c932084b77cedf427f67e7f86fbfb6dcf2699a07b0a43f9ee59660b85c1400ad +size 1064 diff --git a/run-16/checkpoint-144/trainer_state.json b/run-16/checkpoint-144/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cedecf4a5ef3a2c9346f3822eb36470104d5ec08 --- /dev/null +++ b/run-16/checkpoint-144/trainer_state.json @@ -0,0 +1,247 @@ +{ + "best_metric": 0.74, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-96", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 144, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.276584267616272, + "learning_rate": 5.231896023696631e-05, + "loss": 0.5767, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.1047593355178833, + "learning_rate": 5.163415709250341e-05, + "loss": 0.4715, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.551737666130066, + "learning_rate": 5.0949353948040496e-05, + "loss": 0.5687, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.3372799158096313, + "learning_rate": 5.0264550803577585e-05, + "loss": 0.5399, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 4.043832778930664, + "learning_rate": 4.9716708288007255e-05, + "loss": 0.5178, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 5.607926845550537, + "learning_rate": 4.9031905143544343e-05, + "loss": 0.4201, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 10.094707489013672, + "learning_rate": 4.834710199908144e-05, + "loss": 0.6339, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.790902614593506, + "learning_rate": 4.766229885461853e-05, + "loss": 0.3564, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 8.226430892944336, + "learning_rate": 4.6977495710155616e-05, + "loss": 0.2918, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.72, + "eval_loss": 0.8946094512939453, + "eval_runtime": 1.3813, + "eval_samples_per_second": 46.334, + "eval_steps_per_second": 5.792, + "step": 144 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2143403497915440.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-144/training_args.bin b/run-16/checkpoint-144/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-144/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-192/config.json b/run-16/checkpoint-192/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-192/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-192/model.safetensors b/run-16/checkpoint-192/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15afe1e6f3b3c1d76b155af3328bc57c5af2ccbf --- /dev/null +++ b/run-16/checkpoint-192/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04780d3182e544a9ccba2a8e8b2cb2d4599c102fa2c2a0c73178fef7fb2d49d7 +size 94763496 diff --git a/run-16/checkpoint-192/optimizer.pt b/run-16/checkpoint-192/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d71234e640ccbd48709f45c9f53d0c95bc985fe --- /dev/null +++ b/run-16/checkpoint-192/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c19cdf0bc0cde8e0fa1704348d612eba8c9e30f3fd6ea774864232b15043f716 +size 189552570 diff --git a/run-16/checkpoint-192/preprocessor_config.json b/run-16/checkpoint-192/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-192/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-192/rng_state.pth b/run-16/checkpoint-192/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d76370ee36700e6a498a1fbfff621aca7984a77 --- /dev/null +++ b/run-16/checkpoint-192/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5aef020ca2df517540ac9ff4e195e1c41a7b85939e93195d118078f119bc949 +size 14244 diff --git a/run-16/checkpoint-192/scheduler.pt b/run-16/checkpoint-192/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..620985c5c4b92fa43dd68d0abc514af9927d5b52 --- /dev/null +++ b/run-16/checkpoint-192/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5cd90d01996610d68d126a70590a12456b9e80147f3adeece6d6f9c8dd53c22 +size 1064 diff --git a/run-16/checkpoint-192/trainer_state.json b/run-16/checkpoint-192/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b3856586f23c3854797fca2c2f82bec6feb22bc3 --- /dev/null +++ b/run-16/checkpoint-192/trainer_state.json @@ -0,0 +1,326 @@ +{ + "best_metric": 0.7692307692307693, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-192", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 192, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.276584267616272, + "learning_rate": 5.231896023696631e-05, + "loss": 0.5767, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.1047593355178833, + "learning_rate": 5.163415709250341e-05, + "loss": 0.4715, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.551737666130066, + "learning_rate": 5.0949353948040496e-05, + "loss": 0.5687, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.3372799158096313, + "learning_rate": 5.0264550803577585e-05, + "loss": 0.5399, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 4.043832778930664, + "learning_rate": 4.9716708288007255e-05, + "loss": 0.5178, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 5.607926845550537, + "learning_rate": 4.9031905143544343e-05, + "loss": 0.4201, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 10.094707489013672, + "learning_rate": 4.834710199908144e-05, + "loss": 0.6339, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.790902614593506, + "learning_rate": 4.766229885461853e-05, + "loss": 0.3564, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 8.226430892944336, + "learning_rate": 4.6977495710155616e-05, + "loss": 0.2918, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.72, + "eval_loss": 0.8946094512939453, + "eval_runtime": 1.3813, + "eval_samples_per_second": 46.334, + "eval_steps_per_second": 5.792, + "step": 144 + }, + { + "epoch": 3.02, + "grad_norm": 3.521246910095215, + "learning_rate": 4.6292692565692705e-05, + "loss": 0.4787, + "step": 145 + }, + { + "epoch": 3.12, + "grad_norm": Infinity, + "learning_rate": 4.574485005012238e-05, + "loss": 0.3851, + "step": 150 + }, + { + "epoch": 3.23, + "grad_norm": 3.6489925384521484, + "learning_rate": 4.506004690565947e-05, + "loss": 0.2152, + "step": 155 + }, + { + "epoch": 3.33, + "grad_norm": 9.47097396850586, + "learning_rate": 4.451220439008914e-05, + "loss": 0.6503, + "step": 160 + }, + { + "epoch": 3.44, + "grad_norm": 1.4761028289794922, + "learning_rate": 4.382740124562623e-05, + "loss": 0.177, + "step": 165 + }, + { + "epoch": 3.54, + "grad_norm": 15.07903003692627, + "learning_rate": 4.314259810116332e-05, + "loss": 0.3834, + "step": 170 + }, + { + "epoch": 3.65, + "grad_norm": 98.74039459228516, + "learning_rate": 4.245779495670041e-05, + "loss": 0.816, + "step": 175 + }, + { + "epoch": 3.75, + "grad_norm": 0.6765172481536865, + "learning_rate": 4.17729918122375e-05, + "loss": 0.3177, + "step": 180 + }, + { + "epoch": 3.85, + "grad_norm": 1.5212448835372925, + "learning_rate": 4.108818866777459e-05, + "loss": 0.3593, + "step": 185 + }, + { + "epoch": 3.96, + "grad_norm": 27.127269744873047, + "learning_rate": 4.040338552331168e-05, + "loss": 0.3041, + "step": 190 + }, + { + "epoch": 4.0, + "eval_f1": 0.7692307692307693, + "eval_loss": 1.0041449069976807, + "eval_runtime": 1.3873, + "eval_samples_per_second": 46.134, + "eval_steps_per_second": 5.767, + "step": 192 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2913284122113744.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-192/training_args.bin b/run-16/checkpoint-192/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-192/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-240/config.json b/run-16/checkpoint-240/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-240/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-240/model.safetensors b/run-16/checkpoint-240/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a869d0f1a49c316d304d5f69337849dbbebc55a --- /dev/null +++ b/run-16/checkpoint-240/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19d351f8b800d0bf90cdf75efa4130247df4fea4551b6b1063475e4e4177ab4 +size 94763496 diff --git a/run-16/checkpoint-240/optimizer.pt b/run-16/checkpoint-240/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f89632a92013e7f7890842e87dafc143b08af625 --- /dev/null +++ b/run-16/checkpoint-240/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b2786c8419d6ada097db89879b80f68ae9409c81a1a527ebd75b24958c5c0ed +size 189552570 diff --git a/run-16/checkpoint-240/preprocessor_config.json b/run-16/checkpoint-240/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-240/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-240/rng_state.pth b/run-16/checkpoint-240/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6935c8ee6dffa468628cd166bcbf40c96bd4b606 --- /dev/null +++ b/run-16/checkpoint-240/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8eb071c49709b6f4047e7f48105f0dd51daaf73e0a11fd742255aa4c3526f42 +size 14244 diff --git a/run-16/checkpoint-240/scheduler.pt b/run-16/checkpoint-240/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c307bb1a982c25869660ef8fc8d77fd0cce88ae --- /dev/null +++ b/run-16/checkpoint-240/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a962da1586b62e23453d09012558e669f2b3103b065ee25d26bac70242640d4 +size 1064 diff --git a/run-16/checkpoint-240/trainer_state.json b/run-16/checkpoint-240/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1efd7e7e063ab36aad35b7000c6be788a724a79c --- /dev/null +++ b/run-16/checkpoint-240/trainer_state.json @@ -0,0 +1,405 @@ +{ + "best_metric": 0.8089887640449438, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-240", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 240, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.276584267616272, + "learning_rate": 5.231896023696631e-05, + "loss": 0.5767, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.1047593355178833, + "learning_rate": 5.163415709250341e-05, + "loss": 0.4715, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.551737666130066, + "learning_rate": 5.0949353948040496e-05, + "loss": 0.5687, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.3372799158096313, + "learning_rate": 5.0264550803577585e-05, + "loss": 0.5399, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 4.043832778930664, + "learning_rate": 4.9716708288007255e-05, + "loss": 0.5178, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 5.607926845550537, + "learning_rate": 4.9031905143544343e-05, + "loss": 0.4201, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 10.094707489013672, + "learning_rate": 4.834710199908144e-05, + "loss": 0.6339, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.790902614593506, + "learning_rate": 4.766229885461853e-05, + "loss": 0.3564, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 8.226430892944336, + "learning_rate": 4.6977495710155616e-05, + "loss": 0.2918, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.72, + "eval_loss": 0.8946094512939453, + "eval_runtime": 1.3813, + "eval_samples_per_second": 46.334, + "eval_steps_per_second": 5.792, + "step": 144 + }, + { + "epoch": 3.02, + "grad_norm": 3.521246910095215, + "learning_rate": 4.6292692565692705e-05, + "loss": 0.4787, + "step": 145 + }, + { + "epoch": 3.12, + "grad_norm": Infinity, + "learning_rate": 4.574485005012238e-05, + "loss": 0.3851, + "step": 150 + }, + { + "epoch": 3.23, + "grad_norm": 3.6489925384521484, + "learning_rate": 4.506004690565947e-05, + "loss": 0.2152, + "step": 155 + }, + { + "epoch": 3.33, + "grad_norm": 9.47097396850586, + "learning_rate": 4.451220439008914e-05, + "loss": 0.6503, + "step": 160 + }, + { + "epoch": 3.44, + "grad_norm": 1.4761028289794922, + "learning_rate": 4.382740124562623e-05, + "loss": 0.177, + "step": 165 + }, + { + "epoch": 3.54, + "grad_norm": 15.07903003692627, + "learning_rate": 4.314259810116332e-05, + "loss": 0.3834, + "step": 170 + }, + { + "epoch": 3.65, + "grad_norm": 98.74039459228516, + "learning_rate": 4.245779495670041e-05, + "loss": 0.816, + "step": 175 + }, + { + "epoch": 3.75, + "grad_norm": 0.6765172481536865, + "learning_rate": 4.17729918122375e-05, + "loss": 0.3177, + "step": 180 + }, + { + "epoch": 3.85, + "grad_norm": 1.5212448835372925, + "learning_rate": 4.108818866777459e-05, + "loss": 0.3593, + "step": 185 + }, + { + "epoch": 3.96, + "grad_norm": 27.127269744873047, + "learning_rate": 4.040338552331168e-05, + "loss": 0.3041, + "step": 190 + }, + { + "epoch": 4.0, + "eval_f1": 0.7692307692307693, + "eval_loss": 1.0041449069976807, + "eval_runtime": 1.3873, + "eval_samples_per_second": 46.134, + "eval_steps_per_second": 5.767, + "step": 192 + }, + { + "epoch": 4.06, + "grad_norm": 0.7140729427337646, + "learning_rate": 3.9718582378848774e-05, + "loss": 0.2064, + "step": 195 + }, + { + "epoch": 4.17, + "grad_norm": 4.367918491363525, + "learning_rate": 3.903377923438586e-05, + "loss": 0.365, + "step": 200 + }, + { + "epoch": 4.27, + "grad_norm": 0.4135078489780426, + "learning_rate": 3.834897608992295e-05, + "loss": 0.3944, + "step": 205 + }, + { + "epoch": 4.38, + "grad_norm": 88.720458984375, + "learning_rate": 3.766417294546004e-05, + "loss": 0.3774, + "step": 210 + }, + { + "epoch": 4.48, + "grad_norm": 1.5230985879898071, + "learning_rate": 3.697936980099713e-05, + "loss": 0.0844, + "step": 215 + }, + { + "epoch": 4.58, + "grad_norm": Infinity, + "learning_rate": 3.6431527285426805e-05, + "loss": 0.3702, + "step": 220 + }, + { + "epoch": 4.69, + "grad_norm": 39.243804931640625, + "learning_rate": 3.5746724140963894e-05, + "loss": 0.3381, + "step": 225 + }, + { + "epoch": 4.79, + "grad_norm": 0.23127637803554535, + "learning_rate": 3.506192099650098e-05, + "loss": 0.03, + "step": 230 + }, + { + "epoch": 4.9, + "grad_norm": 23.879899978637695, + "learning_rate": 3.451407848093066e-05, + "loss": 0.1969, + "step": 235 + }, + { + "epoch": 5.0, + "grad_norm": 0.2918482720851898, + "learning_rate": 3.382927533646775e-05, + "loss": 0.2145, + "step": 240 + }, + { + "epoch": 5.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.1479806900024414, + "eval_runtime": 1.3695, + "eval_samples_per_second": 46.731, + "eval_steps_per_second": 5.841, + "step": 240 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3675891927575280.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-240/training_args.bin b/run-16/checkpoint-240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-288/config.json b/run-16/checkpoint-288/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-288/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-288/model.safetensors b/run-16/checkpoint-288/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fbe6e3021d4e9b3c188b342e46defdcefde6c98 --- /dev/null +++ b/run-16/checkpoint-288/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd18a6961504cd478edff149c9a0bd167651311c8f80a7bbeb5ca0b1780d33fc +size 94763496 diff --git a/run-16/checkpoint-288/optimizer.pt b/run-16/checkpoint-288/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e8613259d6eaa83a29d8327f8cba88b63a80a06 --- /dev/null +++ b/run-16/checkpoint-288/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e36b21359036340864d7b80b082ef23864b9e2715dc2516d868caf161fa93c6 +size 189552570 diff --git a/run-16/checkpoint-288/preprocessor_config.json b/run-16/checkpoint-288/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-288/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-288/rng_state.pth b/run-16/checkpoint-288/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..de638b27cad47a23bcba70558a870ddb08a0f7e8 --- /dev/null +++ b/run-16/checkpoint-288/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9903236b654011babeaee26ea70e1c6278fa670549b900c6df1d64732428a642 +size 14244 diff --git a/run-16/checkpoint-288/scheduler.pt b/run-16/checkpoint-288/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0de6c17359bd2cfc32ee9c98c4bb4d263a0e2ad --- /dev/null +++ b/run-16/checkpoint-288/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e827d15e138823d91fc00eca49d12e98f280d927678ce5bb7a93895392de56b +size 1064 diff --git a/run-16/checkpoint-288/trainer_state.json b/run-16/checkpoint-288/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7494d8160c0f07089804fdd4e2c36b6eaa01adbe --- /dev/null +++ b/run-16/checkpoint-288/trainer_state.json @@ -0,0 +1,477 @@ +{ + "best_metric": 0.8089887640449438, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-240", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 288, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.276584267616272, + "learning_rate": 5.231896023696631e-05, + "loss": 0.5767, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.1047593355178833, + "learning_rate": 5.163415709250341e-05, + "loss": 0.4715, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.551737666130066, + "learning_rate": 5.0949353948040496e-05, + "loss": 0.5687, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.3372799158096313, + "learning_rate": 5.0264550803577585e-05, + "loss": 0.5399, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 4.043832778930664, + "learning_rate": 4.9716708288007255e-05, + "loss": 0.5178, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 5.607926845550537, + "learning_rate": 4.9031905143544343e-05, + "loss": 0.4201, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 10.094707489013672, + "learning_rate": 4.834710199908144e-05, + "loss": 0.6339, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.790902614593506, + "learning_rate": 4.766229885461853e-05, + "loss": 0.3564, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 8.226430892944336, + "learning_rate": 4.6977495710155616e-05, + "loss": 0.2918, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.72, + "eval_loss": 0.8946094512939453, + "eval_runtime": 1.3813, + "eval_samples_per_second": 46.334, + "eval_steps_per_second": 5.792, + "step": 144 + }, + { + "epoch": 3.02, + "grad_norm": 3.521246910095215, + "learning_rate": 4.6292692565692705e-05, + "loss": 0.4787, + "step": 145 + }, + { + "epoch": 3.12, + "grad_norm": Infinity, + "learning_rate": 4.574485005012238e-05, + "loss": 0.3851, + "step": 150 + }, + { + "epoch": 3.23, + "grad_norm": 3.6489925384521484, + "learning_rate": 4.506004690565947e-05, + "loss": 0.2152, + "step": 155 + }, + { + "epoch": 3.33, + "grad_norm": 9.47097396850586, + "learning_rate": 4.451220439008914e-05, + "loss": 0.6503, + "step": 160 + }, + { + "epoch": 3.44, + "grad_norm": 1.4761028289794922, + "learning_rate": 4.382740124562623e-05, + "loss": 0.177, + "step": 165 + }, + { + "epoch": 3.54, + "grad_norm": 15.07903003692627, + "learning_rate": 4.314259810116332e-05, + "loss": 0.3834, + "step": 170 + }, + { + "epoch": 3.65, + "grad_norm": 98.74039459228516, + "learning_rate": 4.245779495670041e-05, + "loss": 0.816, + "step": 175 + }, + { + "epoch": 3.75, + "grad_norm": 0.6765172481536865, + "learning_rate": 4.17729918122375e-05, + "loss": 0.3177, + "step": 180 + }, + { + "epoch": 3.85, + "grad_norm": 1.5212448835372925, + "learning_rate": 4.108818866777459e-05, + "loss": 0.3593, + "step": 185 + }, + { + "epoch": 3.96, + "grad_norm": 27.127269744873047, + "learning_rate": 4.040338552331168e-05, + "loss": 0.3041, + "step": 190 + }, + { + "epoch": 4.0, + "eval_f1": 0.7692307692307693, + "eval_loss": 1.0041449069976807, + "eval_runtime": 1.3873, + "eval_samples_per_second": 46.134, + "eval_steps_per_second": 5.767, + "step": 192 + }, + { + "epoch": 4.06, + "grad_norm": 0.7140729427337646, + "learning_rate": 3.9718582378848774e-05, + "loss": 0.2064, + "step": 195 + }, + { + "epoch": 4.17, + "grad_norm": 4.367918491363525, + "learning_rate": 3.903377923438586e-05, + "loss": 0.365, + "step": 200 + }, + { + "epoch": 4.27, + "grad_norm": 0.4135078489780426, + "learning_rate": 3.834897608992295e-05, + "loss": 0.3944, + "step": 205 + }, + { + "epoch": 4.38, + "grad_norm": 88.720458984375, + "learning_rate": 3.766417294546004e-05, + "loss": 0.3774, + "step": 210 + }, + { + "epoch": 4.48, + "grad_norm": 1.5230985879898071, + "learning_rate": 3.697936980099713e-05, + "loss": 0.0844, + "step": 215 + }, + { + "epoch": 4.58, + "grad_norm": Infinity, + "learning_rate": 3.6431527285426805e-05, + "loss": 0.3702, + "step": 220 + }, + { + "epoch": 4.69, + "grad_norm": 39.243804931640625, + "learning_rate": 3.5746724140963894e-05, + "loss": 0.3381, + "step": 225 + }, + { + "epoch": 4.79, + "grad_norm": 0.23127637803554535, + "learning_rate": 3.506192099650098e-05, + "loss": 0.03, + "step": 230 + }, + { + "epoch": 4.9, + "grad_norm": 23.879899978637695, + "learning_rate": 3.451407848093066e-05, + "loss": 0.1969, + "step": 235 + }, + { + "epoch": 5.0, + "grad_norm": 0.2918482720851898, + "learning_rate": 3.382927533646775e-05, + "loss": 0.2145, + "step": 240 + }, + { + "epoch": 5.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.1479806900024414, + "eval_runtime": 1.3695, + "eval_samples_per_second": 46.731, + "eval_steps_per_second": 5.841, + "step": 240 + }, + { + "epoch": 5.1, + "grad_norm": 0.22944150865077972, + "learning_rate": 3.3144472192004837e-05, + "loss": 0.018, + "step": 245 + }, + { + "epoch": 5.21, + "grad_norm": 2.509162425994873, + "learning_rate": 3.245966904754193e-05, + "loss": 0.0209, + "step": 250 + }, + { + "epoch": 5.31, + "grad_norm": 1.8163331747055054, + "learning_rate": 3.177486590307902e-05, + "loss": 0.3779, + "step": 255 + }, + { + "epoch": 5.42, + "grad_norm": 10.014881134033203, + "learning_rate": 3.10900627586161e-05, + "loss": 0.0881, + "step": 260 + }, + { + "epoch": 5.52, + "grad_norm": 0.18950092792510986, + "learning_rate": 3.0405259614153195e-05, + "loss": 0.0141, + "step": 265 + }, + { + "epoch": 5.62, + "grad_norm": 0.19155395030975342, + "learning_rate": 2.9720456469690287e-05, + "loss": 0.142, + "step": 270 + }, + { + "epoch": 5.73, + "grad_norm": 73.96759033203125, + "learning_rate": 2.903565332522738e-05, + "loss": 0.5048, + "step": 275 + }, + { + "epoch": 5.83, + "grad_norm": 0.18716034293174744, + "learning_rate": 2.8350850180764468e-05, + "loss": 0.2629, + "step": 280 + }, + { + "epoch": 5.94, + "grad_norm": 4.706090450286865, + "learning_rate": 2.766604703630156e-05, + "loss": 0.2084, + "step": 285 + }, + { + "epoch": 6.0, + "eval_f1": 0.782608695652174, + "eval_loss": 1.4499547481536865, + "eval_runtime": 1.3743, + "eval_samples_per_second": 46.571, + "eval_steps_per_second": 5.821, + "step": 288 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4378068590420352.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-288/training_args.bin b/run-16/checkpoint-288/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-288/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-336/config.json b/run-16/checkpoint-336/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-336/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-336/model.safetensors b/run-16/checkpoint-336/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d604dfe86ebe7cfcedbcdb9ad50f8f38df5f2944 --- /dev/null +++ b/run-16/checkpoint-336/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:459d51a327bf3d10ffc8b11fc482807bfbb2e17771cbb29aea04ef644c7e0eaf +size 94763496 diff --git a/run-16/checkpoint-336/optimizer.pt b/run-16/checkpoint-336/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab2538edd230c7c19d0c9b9da018a1c3c71b2526 --- /dev/null +++ b/run-16/checkpoint-336/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf5fceaae52c37997458b15f668b390461cd5a1a168fd843cf541ff45fce057 +size 189552570 diff --git a/run-16/checkpoint-336/preprocessor_config.json b/run-16/checkpoint-336/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-336/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-336/rng_state.pth b/run-16/checkpoint-336/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d09c593f640bea3364e14655046e2da93b3ebc1 --- /dev/null +++ b/run-16/checkpoint-336/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d12884ae20f0c926a355fda8650edc055a398d4c7c42545ccdb7d60bd202452 +size 14244 diff --git a/run-16/checkpoint-336/scheduler.pt b/run-16/checkpoint-336/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9068e1e12b0a9705455ec38c5733d4a5333321b6 --- /dev/null +++ b/run-16/checkpoint-336/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:419c33c72e28f877d3139ab2d729dda74bd34855f9abc91f57039cd2c0bfede0 +size 1064 diff --git a/run-16/checkpoint-336/trainer_state.json b/run-16/checkpoint-336/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..faa7cfa3111c72140c7ab121406bd20505fda44d --- /dev/null +++ b/run-16/checkpoint-336/trainer_state.json @@ -0,0 +1,556 @@ +{ + "best_metric": 0.8089887640449438, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-240", + "epoch": 7.0, + "eval_steps": 500, + "global_step": 336, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.276584267616272, + "learning_rate": 5.231896023696631e-05, + "loss": 0.5767, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.1047593355178833, + "learning_rate": 5.163415709250341e-05, + "loss": 0.4715, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.551737666130066, + "learning_rate": 5.0949353948040496e-05, + "loss": 0.5687, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.3372799158096313, + "learning_rate": 5.0264550803577585e-05, + "loss": 0.5399, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 4.043832778930664, + "learning_rate": 4.9716708288007255e-05, + "loss": 0.5178, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 5.607926845550537, + "learning_rate": 4.9031905143544343e-05, + "loss": 0.4201, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 10.094707489013672, + "learning_rate": 4.834710199908144e-05, + "loss": 0.6339, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.790902614593506, + "learning_rate": 4.766229885461853e-05, + "loss": 0.3564, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 8.226430892944336, + "learning_rate": 4.6977495710155616e-05, + "loss": 0.2918, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.72, + "eval_loss": 0.8946094512939453, + "eval_runtime": 1.3813, + "eval_samples_per_second": 46.334, + "eval_steps_per_second": 5.792, + "step": 144 + }, + { + "epoch": 3.02, + "grad_norm": 3.521246910095215, + "learning_rate": 4.6292692565692705e-05, + "loss": 0.4787, + "step": 145 + }, + { + "epoch": 3.12, + "grad_norm": Infinity, + "learning_rate": 4.574485005012238e-05, + "loss": 0.3851, + "step": 150 + }, + { + "epoch": 3.23, + "grad_norm": 3.6489925384521484, + "learning_rate": 4.506004690565947e-05, + "loss": 0.2152, + "step": 155 + }, + { + "epoch": 3.33, + "grad_norm": 9.47097396850586, + "learning_rate": 4.451220439008914e-05, + "loss": 0.6503, + "step": 160 + }, + { + "epoch": 3.44, + "grad_norm": 1.4761028289794922, + "learning_rate": 4.382740124562623e-05, + "loss": 0.177, + "step": 165 + }, + { + "epoch": 3.54, + "grad_norm": 15.07903003692627, + "learning_rate": 4.314259810116332e-05, + "loss": 0.3834, + "step": 170 + }, + { + "epoch": 3.65, + "grad_norm": 98.74039459228516, + "learning_rate": 4.245779495670041e-05, + "loss": 0.816, + "step": 175 + }, + { + "epoch": 3.75, + "grad_norm": 0.6765172481536865, + "learning_rate": 4.17729918122375e-05, + "loss": 0.3177, + "step": 180 + }, + { + "epoch": 3.85, + "grad_norm": 1.5212448835372925, + "learning_rate": 4.108818866777459e-05, + "loss": 0.3593, + "step": 185 + }, + { + "epoch": 3.96, + "grad_norm": 27.127269744873047, + "learning_rate": 4.040338552331168e-05, + "loss": 0.3041, + "step": 190 + }, + { + "epoch": 4.0, + "eval_f1": 0.7692307692307693, + "eval_loss": 1.0041449069976807, + "eval_runtime": 1.3873, + "eval_samples_per_second": 46.134, + "eval_steps_per_second": 5.767, + "step": 192 + }, + { + "epoch": 4.06, + "grad_norm": 0.7140729427337646, + "learning_rate": 3.9718582378848774e-05, + "loss": 0.2064, + "step": 195 + }, + { + "epoch": 4.17, + "grad_norm": 4.367918491363525, + "learning_rate": 3.903377923438586e-05, + "loss": 0.365, + "step": 200 + }, + { + "epoch": 4.27, + "grad_norm": 0.4135078489780426, + "learning_rate": 3.834897608992295e-05, + "loss": 0.3944, + "step": 205 + }, + { + "epoch": 4.38, + "grad_norm": 88.720458984375, + "learning_rate": 3.766417294546004e-05, + "loss": 0.3774, + "step": 210 + }, + { + "epoch": 4.48, + "grad_norm": 1.5230985879898071, + "learning_rate": 3.697936980099713e-05, + "loss": 0.0844, + "step": 215 + }, + { + "epoch": 4.58, + "grad_norm": Infinity, + "learning_rate": 3.6431527285426805e-05, + "loss": 0.3702, + "step": 220 + }, + { + "epoch": 4.69, + "grad_norm": 39.243804931640625, + "learning_rate": 3.5746724140963894e-05, + "loss": 0.3381, + "step": 225 + }, + { + "epoch": 4.79, + "grad_norm": 0.23127637803554535, + "learning_rate": 3.506192099650098e-05, + "loss": 0.03, + "step": 230 + }, + { + "epoch": 4.9, + "grad_norm": 23.879899978637695, + "learning_rate": 3.451407848093066e-05, + "loss": 0.1969, + "step": 235 + }, + { + "epoch": 5.0, + "grad_norm": 0.2918482720851898, + "learning_rate": 3.382927533646775e-05, + "loss": 0.2145, + "step": 240 + }, + { + "epoch": 5.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.1479806900024414, + "eval_runtime": 1.3695, + "eval_samples_per_second": 46.731, + "eval_steps_per_second": 5.841, + "step": 240 + }, + { + "epoch": 5.1, + "grad_norm": 0.22944150865077972, + "learning_rate": 3.3144472192004837e-05, + "loss": 0.018, + "step": 245 + }, + { + "epoch": 5.21, + "grad_norm": 2.509162425994873, + "learning_rate": 3.245966904754193e-05, + "loss": 0.0209, + "step": 250 + }, + { + "epoch": 5.31, + "grad_norm": 1.8163331747055054, + "learning_rate": 3.177486590307902e-05, + "loss": 0.3779, + "step": 255 + }, + { + "epoch": 5.42, + "grad_norm": 10.014881134033203, + "learning_rate": 3.10900627586161e-05, + "loss": 0.0881, + "step": 260 + }, + { + "epoch": 5.52, + "grad_norm": 0.18950092792510986, + "learning_rate": 3.0405259614153195e-05, + "loss": 0.0141, + "step": 265 + }, + { + "epoch": 5.62, + "grad_norm": 0.19155395030975342, + "learning_rate": 2.9720456469690287e-05, + "loss": 0.142, + "step": 270 + }, + { + "epoch": 5.73, + "grad_norm": 73.96759033203125, + "learning_rate": 2.903565332522738e-05, + "loss": 0.5048, + "step": 275 + }, + { + "epoch": 5.83, + "grad_norm": 0.18716034293174744, + "learning_rate": 2.8350850180764468e-05, + "loss": 0.2629, + "step": 280 + }, + { + "epoch": 5.94, + "grad_norm": 4.706090450286865, + "learning_rate": 2.766604703630156e-05, + "loss": 0.2084, + "step": 285 + }, + { + "epoch": 6.0, + "eval_f1": 0.782608695652174, + "eval_loss": 1.4499547481536865, + "eval_runtime": 1.3743, + "eval_samples_per_second": 46.571, + "eval_steps_per_second": 5.821, + "step": 288 + }, + { + "epoch": 6.04, + "grad_norm": 0.12213920056819916, + "learning_rate": 2.698124389183865e-05, + "loss": 0.2129, + "step": 290 + }, + { + "epoch": 6.15, + "grad_norm": 0.1558620184659958, + "learning_rate": 2.6296440747375737e-05, + "loss": 0.0102, + "step": 295 + }, + { + "epoch": 6.25, + "grad_norm": 0.12462862581014633, + "learning_rate": 2.561163760291283e-05, + "loss": 0.0184, + "step": 300 + }, + { + "epoch": 6.35, + "grad_norm": 1.7250605821609497, + "learning_rate": 2.4926834458449918e-05, + "loss": 0.0349, + "step": 305 + }, + { + "epoch": 6.46, + "grad_norm": 0.11989390850067139, + "learning_rate": 2.424203131398701e-05, + "loss": 0.1003, + "step": 310 + }, + { + "epoch": 6.56, + "grad_norm": 0.14550165832042694, + "learning_rate": 2.35572281695241e-05, + "loss": 0.201, + "step": 315 + }, + { + "epoch": 6.67, + "grad_norm": 102.21455383300781, + "learning_rate": 2.287242502506119e-05, + "loss": 0.2073, + "step": 320 + }, + { + "epoch": 6.77, + "grad_norm": 0.330213338136673, + "learning_rate": 2.218762188059828e-05, + "loss": 0.2589, + "step": 325 + }, + { + "epoch": 6.88, + "grad_norm": 1.8776073455810547, + "learning_rate": 2.1502818736135368e-05, + "loss": 0.0065, + "step": 330 + }, + { + "epoch": 6.98, + "grad_norm": 0.09140625596046448, + "learning_rate": 2.081801559167246e-05, + "loss": 0.2754, + "step": 335 + }, + { + "epoch": 7.0, + "eval_f1": 0.7500000000000001, + "eval_loss": 1.7005689144134521, + "eval_runtime": 1.3874, + "eval_samples_per_second": 46.131, + "eval_steps_per_second": 5.766, + "step": 336 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5137916681830272.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-336/training_args.bin b/run-16/checkpoint-336/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-336/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-384/config.json b/run-16/checkpoint-384/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-384/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-384/model.safetensors b/run-16/checkpoint-384/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e67401191c95df3df8d69b3a38df345b967011ee --- /dev/null +++ b/run-16/checkpoint-384/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:517bacffb0c50593c7b95d069f8d5a02aa47232556d6feea86b0f1ae6545cc0e +size 94763496 diff --git a/run-16/checkpoint-384/optimizer.pt b/run-16/checkpoint-384/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bc21d0e622f2a615bad9ab6af96b5c8fd18a290 --- /dev/null +++ b/run-16/checkpoint-384/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a65bac5ea65fa381624852a97eb7a0f1cc84e0ba64f4e7d6841a2fb40a67aba +size 189552570 diff --git a/run-16/checkpoint-384/preprocessor_config.json b/run-16/checkpoint-384/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-384/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-384/rng_state.pth b/run-16/checkpoint-384/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a6bd57b8c17138ed3367d5ca6692d78d760bd47 --- /dev/null +++ b/run-16/checkpoint-384/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5326b9611b4fb9dc5dc0b29580e7e48abf50913e44071592799c052bebfbacd7 +size 14244 diff --git a/run-16/checkpoint-384/scheduler.pt b/run-16/checkpoint-384/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4888f1340ebf255933d17fb0e4bb364851d926e --- /dev/null +++ b/run-16/checkpoint-384/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e170197142da1da992bcf5725da82f7f7d2406a21d1a4428c188f233b702c9 +size 1064 diff --git a/run-16/checkpoint-384/trainer_state.json b/run-16/checkpoint-384/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6d1e9d7165639897cf86a7913247c4dbbe2c7955 --- /dev/null +++ b/run-16/checkpoint-384/trainer_state.json @@ -0,0 +1,628 @@ +{ + "best_metric": 0.8089887640449438, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-240", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 384, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.276584267616272, + "learning_rate": 5.231896023696631e-05, + "loss": 0.5767, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.1047593355178833, + "learning_rate": 5.163415709250341e-05, + "loss": 0.4715, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.551737666130066, + "learning_rate": 5.0949353948040496e-05, + "loss": 0.5687, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.3372799158096313, + "learning_rate": 5.0264550803577585e-05, + "loss": 0.5399, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 4.043832778930664, + "learning_rate": 4.9716708288007255e-05, + "loss": 0.5178, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 5.607926845550537, + "learning_rate": 4.9031905143544343e-05, + "loss": 0.4201, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 10.094707489013672, + "learning_rate": 4.834710199908144e-05, + "loss": 0.6339, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.790902614593506, + "learning_rate": 4.766229885461853e-05, + "loss": 0.3564, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 8.226430892944336, + "learning_rate": 4.6977495710155616e-05, + "loss": 0.2918, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.72, + "eval_loss": 0.8946094512939453, + "eval_runtime": 1.3813, + "eval_samples_per_second": 46.334, + "eval_steps_per_second": 5.792, + "step": 144 + }, + { + "epoch": 3.02, + "grad_norm": 3.521246910095215, + "learning_rate": 4.6292692565692705e-05, + "loss": 0.4787, + "step": 145 + }, + { + "epoch": 3.12, + "grad_norm": Infinity, + "learning_rate": 4.574485005012238e-05, + "loss": 0.3851, + "step": 150 + }, + { + "epoch": 3.23, + "grad_norm": 3.6489925384521484, + "learning_rate": 4.506004690565947e-05, + "loss": 0.2152, + "step": 155 + }, + { + "epoch": 3.33, + "grad_norm": 9.47097396850586, + "learning_rate": 4.451220439008914e-05, + "loss": 0.6503, + "step": 160 + }, + { + "epoch": 3.44, + "grad_norm": 1.4761028289794922, + "learning_rate": 4.382740124562623e-05, + "loss": 0.177, + "step": 165 + }, + { + "epoch": 3.54, + "grad_norm": 15.07903003692627, + "learning_rate": 4.314259810116332e-05, + "loss": 0.3834, + "step": 170 + }, + { + "epoch": 3.65, + "grad_norm": 98.74039459228516, + "learning_rate": 4.245779495670041e-05, + "loss": 0.816, + "step": 175 + }, + { + "epoch": 3.75, + "grad_norm": 0.6765172481536865, + "learning_rate": 4.17729918122375e-05, + "loss": 0.3177, + "step": 180 + }, + { + "epoch": 3.85, + "grad_norm": 1.5212448835372925, + "learning_rate": 4.108818866777459e-05, + "loss": 0.3593, + "step": 185 + }, + { + "epoch": 3.96, + "grad_norm": 27.127269744873047, + "learning_rate": 4.040338552331168e-05, + "loss": 0.3041, + "step": 190 + }, + { + "epoch": 4.0, + "eval_f1": 0.7692307692307693, + "eval_loss": 1.0041449069976807, + "eval_runtime": 1.3873, + "eval_samples_per_second": 46.134, + "eval_steps_per_second": 5.767, + "step": 192 + }, + { + "epoch": 4.06, + "grad_norm": 0.7140729427337646, + "learning_rate": 3.9718582378848774e-05, + "loss": 0.2064, + "step": 195 + }, + { + "epoch": 4.17, + "grad_norm": 4.367918491363525, + "learning_rate": 3.903377923438586e-05, + "loss": 0.365, + "step": 200 + }, + { + "epoch": 4.27, + "grad_norm": 0.4135078489780426, + "learning_rate": 3.834897608992295e-05, + "loss": 0.3944, + "step": 205 + }, + { + "epoch": 4.38, + "grad_norm": 88.720458984375, + "learning_rate": 3.766417294546004e-05, + "loss": 0.3774, + "step": 210 + }, + { + "epoch": 4.48, + "grad_norm": 1.5230985879898071, + "learning_rate": 3.697936980099713e-05, + "loss": 0.0844, + "step": 215 + }, + { + "epoch": 4.58, + "grad_norm": Infinity, + "learning_rate": 3.6431527285426805e-05, + "loss": 0.3702, + "step": 220 + }, + { + "epoch": 4.69, + "grad_norm": 39.243804931640625, + "learning_rate": 3.5746724140963894e-05, + "loss": 0.3381, + "step": 225 + }, + { + "epoch": 4.79, + "grad_norm": 0.23127637803554535, + "learning_rate": 3.506192099650098e-05, + "loss": 0.03, + "step": 230 + }, + { + "epoch": 4.9, + "grad_norm": 23.879899978637695, + "learning_rate": 3.451407848093066e-05, + "loss": 0.1969, + "step": 235 + }, + { + "epoch": 5.0, + "grad_norm": 0.2918482720851898, + "learning_rate": 3.382927533646775e-05, + "loss": 0.2145, + "step": 240 + }, + { + "epoch": 5.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.1479806900024414, + "eval_runtime": 1.3695, + "eval_samples_per_second": 46.731, + "eval_steps_per_second": 5.841, + "step": 240 + }, + { + "epoch": 5.1, + "grad_norm": 0.22944150865077972, + "learning_rate": 3.3144472192004837e-05, + "loss": 0.018, + "step": 245 + }, + { + "epoch": 5.21, + "grad_norm": 2.509162425994873, + "learning_rate": 3.245966904754193e-05, + "loss": 0.0209, + "step": 250 + }, + { + "epoch": 5.31, + "grad_norm": 1.8163331747055054, + "learning_rate": 3.177486590307902e-05, + "loss": 0.3779, + "step": 255 + }, + { + "epoch": 5.42, + "grad_norm": 10.014881134033203, + "learning_rate": 3.10900627586161e-05, + "loss": 0.0881, + "step": 260 + }, + { + "epoch": 5.52, + "grad_norm": 0.18950092792510986, + "learning_rate": 3.0405259614153195e-05, + "loss": 0.0141, + "step": 265 + }, + { + "epoch": 5.62, + "grad_norm": 0.19155395030975342, + "learning_rate": 2.9720456469690287e-05, + "loss": 0.142, + "step": 270 + }, + { + "epoch": 5.73, + "grad_norm": 73.96759033203125, + "learning_rate": 2.903565332522738e-05, + "loss": 0.5048, + "step": 275 + }, + { + "epoch": 5.83, + "grad_norm": 0.18716034293174744, + "learning_rate": 2.8350850180764468e-05, + "loss": 0.2629, + "step": 280 + }, + { + "epoch": 5.94, + "grad_norm": 4.706090450286865, + "learning_rate": 2.766604703630156e-05, + "loss": 0.2084, + "step": 285 + }, + { + "epoch": 6.0, + "eval_f1": 0.782608695652174, + "eval_loss": 1.4499547481536865, + "eval_runtime": 1.3743, + "eval_samples_per_second": 46.571, + "eval_steps_per_second": 5.821, + "step": 288 + }, + { + "epoch": 6.04, + "grad_norm": 0.12213920056819916, + "learning_rate": 2.698124389183865e-05, + "loss": 0.2129, + "step": 290 + }, + { + "epoch": 6.15, + "grad_norm": 0.1558620184659958, + "learning_rate": 2.6296440747375737e-05, + "loss": 0.0102, + "step": 295 + }, + { + "epoch": 6.25, + "grad_norm": 0.12462862581014633, + "learning_rate": 2.561163760291283e-05, + "loss": 0.0184, + "step": 300 + }, + { + "epoch": 6.35, + "grad_norm": 1.7250605821609497, + "learning_rate": 2.4926834458449918e-05, + "loss": 0.0349, + "step": 305 + }, + { + "epoch": 6.46, + "grad_norm": 0.11989390850067139, + "learning_rate": 2.424203131398701e-05, + "loss": 0.1003, + "step": 310 + }, + { + "epoch": 6.56, + "grad_norm": 0.14550165832042694, + "learning_rate": 2.35572281695241e-05, + "loss": 0.201, + "step": 315 + }, + { + "epoch": 6.67, + "grad_norm": 102.21455383300781, + "learning_rate": 2.287242502506119e-05, + "loss": 0.2073, + "step": 320 + }, + { + "epoch": 6.77, + "grad_norm": 0.330213338136673, + "learning_rate": 2.218762188059828e-05, + "loss": 0.2589, + "step": 325 + }, + { + "epoch": 6.88, + "grad_norm": 1.8776073455810547, + "learning_rate": 2.1502818736135368e-05, + "loss": 0.0065, + "step": 330 + }, + { + "epoch": 6.98, + "grad_norm": 0.09140625596046448, + "learning_rate": 2.081801559167246e-05, + "loss": 0.2754, + "step": 335 + }, + { + "epoch": 7.0, + "eval_f1": 0.7500000000000001, + "eval_loss": 1.7005689144134521, + "eval_runtime": 1.3874, + "eval_samples_per_second": 46.131, + "eval_steps_per_second": 5.766, + "step": 336 + }, + { + "epoch": 7.08, + "grad_norm": 25.047473907470703, + "learning_rate": 2.013321244720955e-05, + "loss": 0.2759, + "step": 340 + }, + { + "epoch": 7.19, + "grad_norm": 0.08365596830844879, + "learning_rate": 1.944840930274664e-05, + "loss": 0.0138, + "step": 345 + }, + { + "epoch": 7.29, + "grad_norm": 0.13810297846794128, + "learning_rate": 1.876360615828373e-05, + "loss": 0.0065, + "step": 350 + }, + { + "epoch": 7.4, + "grad_norm": 6.976583003997803, + "learning_rate": 1.8078803013820822e-05, + "loss": 0.0071, + "step": 355 + }, + { + "epoch": 7.5, + "grad_norm": 0.07699556648731232, + "learning_rate": 1.739399986935791e-05, + "loss": 0.0057, + "step": 360 + }, + { + "epoch": 7.6, + "grad_norm": 0.11107699573040009, + "learning_rate": 1.6709196724895e-05, + "loss": 0.005, + "step": 365 + }, + { + "epoch": 7.71, + "grad_norm": 0.2534348964691162, + "learning_rate": 1.602439358043209e-05, + "loss": 0.0116, + "step": 370 + }, + { + "epoch": 7.81, + "grad_norm": 0.06619343161582947, + "learning_rate": 1.533959043596918e-05, + "loss": 0.2779, + "step": 375 + }, + { + "epoch": 7.92, + "grad_norm": 0.0785360187292099, + "learning_rate": 1.465478729150627e-05, + "loss": 0.0047, + "step": 380 + }, + { + "epoch": 8.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.398769497871399, + "eval_runtime": 1.4115, + "eval_samples_per_second": 45.342, + "eval_steps_per_second": 5.668, + "step": 384 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5830312108469328.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-384/training_args.bin b/run-16/checkpoint-384/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-384/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-432/config.json b/run-16/checkpoint-432/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-432/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-432/model.safetensors b/run-16/checkpoint-432/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adce959bd3a2e51d4feb6bcf7870c29025979edc --- /dev/null +++ b/run-16/checkpoint-432/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52855d139d992ca126fe09cfa7a04caea4826cdc55ab4b855b54e040f9a06800 +size 94763496 diff --git a/run-16/checkpoint-432/optimizer.pt b/run-16/checkpoint-432/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6115aa426d0a686e00f9546ec3fdbc747eb9d13f --- /dev/null +++ b/run-16/checkpoint-432/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a0c6751cdc4c4aaf1fd9128f527e20343c98c923c902d3ce099bbf94e847df +size 189552570 diff --git a/run-16/checkpoint-432/preprocessor_config.json b/run-16/checkpoint-432/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-432/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-432/rng_state.pth b/run-16/checkpoint-432/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bcd8fc524b275ac25e742ea920b25e885f1074e5 --- /dev/null +++ b/run-16/checkpoint-432/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03d982e5865661a49a8729a4369776f96011f643bb66895e82e3a7651ff4f807 +size 14244 diff --git a/run-16/checkpoint-432/scheduler.pt b/run-16/checkpoint-432/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..041f4ee6043c79732a3cc136aab65c3b4b525efb --- /dev/null +++ b/run-16/checkpoint-432/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4830f64afbf12271528443420aa1173685f511072ab5ae0d5e4da5032f42cd7e +size 1064 diff --git a/run-16/checkpoint-432/trainer_state.json b/run-16/checkpoint-432/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5fc96bbd7213c571cfc8532c21a1ae9c591ae8b0 --- /dev/null +++ b/run-16/checkpoint-432/trainer_state.json @@ -0,0 +1,707 @@ +{ + "best_metric": 0.8089887640449438, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-240", + "epoch": 9.0, + "eval_steps": 500, + "global_step": 432, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.276584267616272, + "learning_rate": 5.231896023696631e-05, + "loss": 0.5767, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.1047593355178833, + "learning_rate": 5.163415709250341e-05, + "loss": 0.4715, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.551737666130066, + "learning_rate": 5.0949353948040496e-05, + "loss": 0.5687, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.3372799158096313, + "learning_rate": 5.0264550803577585e-05, + "loss": 0.5399, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 4.043832778930664, + "learning_rate": 4.9716708288007255e-05, + "loss": 0.5178, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 5.607926845550537, + "learning_rate": 4.9031905143544343e-05, + "loss": 0.4201, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 10.094707489013672, + "learning_rate": 4.834710199908144e-05, + "loss": 0.6339, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.790902614593506, + "learning_rate": 4.766229885461853e-05, + "loss": 0.3564, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 8.226430892944336, + "learning_rate": 4.6977495710155616e-05, + "loss": 0.2918, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.72, + "eval_loss": 0.8946094512939453, + "eval_runtime": 1.3813, + "eval_samples_per_second": 46.334, + "eval_steps_per_second": 5.792, + "step": 144 + }, + { + "epoch": 3.02, + "grad_norm": 3.521246910095215, + "learning_rate": 4.6292692565692705e-05, + "loss": 0.4787, + "step": 145 + }, + { + "epoch": 3.12, + "grad_norm": Infinity, + "learning_rate": 4.574485005012238e-05, + "loss": 0.3851, + "step": 150 + }, + { + "epoch": 3.23, + "grad_norm": 3.6489925384521484, + "learning_rate": 4.506004690565947e-05, + "loss": 0.2152, + "step": 155 + }, + { + "epoch": 3.33, + "grad_norm": 9.47097396850586, + "learning_rate": 4.451220439008914e-05, + "loss": 0.6503, + "step": 160 + }, + { + "epoch": 3.44, + "grad_norm": 1.4761028289794922, + "learning_rate": 4.382740124562623e-05, + "loss": 0.177, + "step": 165 + }, + { + "epoch": 3.54, + "grad_norm": 15.07903003692627, + "learning_rate": 4.314259810116332e-05, + "loss": 0.3834, + "step": 170 + }, + { + "epoch": 3.65, + "grad_norm": 98.74039459228516, + "learning_rate": 4.245779495670041e-05, + "loss": 0.816, + "step": 175 + }, + { + "epoch": 3.75, + "grad_norm": 0.6765172481536865, + "learning_rate": 4.17729918122375e-05, + "loss": 0.3177, + "step": 180 + }, + { + "epoch": 3.85, + "grad_norm": 1.5212448835372925, + "learning_rate": 4.108818866777459e-05, + "loss": 0.3593, + "step": 185 + }, + { + "epoch": 3.96, + "grad_norm": 27.127269744873047, + "learning_rate": 4.040338552331168e-05, + "loss": 0.3041, + "step": 190 + }, + { + "epoch": 4.0, + "eval_f1": 0.7692307692307693, + "eval_loss": 1.0041449069976807, + "eval_runtime": 1.3873, + "eval_samples_per_second": 46.134, + "eval_steps_per_second": 5.767, + "step": 192 + }, + { + "epoch": 4.06, + "grad_norm": 0.7140729427337646, + "learning_rate": 3.9718582378848774e-05, + "loss": 0.2064, + "step": 195 + }, + { + "epoch": 4.17, + "grad_norm": 4.367918491363525, + "learning_rate": 3.903377923438586e-05, + "loss": 0.365, + "step": 200 + }, + { + "epoch": 4.27, + "grad_norm": 0.4135078489780426, + "learning_rate": 3.834897608992295e-05, + "loss": 0.3944, + "step": 205 + }, + { + "epoch": 4.38, + "grad_norm": 88.720458984375, + "learning_rate": 3.766417294546004e-05, + "loss": 0.3774, + "step": 210 + }, + { + "epoch": 4.48, + "grad_norm": 1.5230985879898071, + "learning_rate": 3.697936980099713e-05, + "loss": 0.0844, + "step": 215 + }, + { + "epoch": 4.58, + "grad_norm": Infinity, + "learning_rate": 3.6431527285426805e-05, + "loss": 0.3702, + "step": 220 + }, + { + "epoch": 4.69, + "grad_norm": 39.243804931640625, + "learning_rate": 3.5746724140963894e-05, + "loss": 0.3381, + "step": 225 + }, + { + "epoch": 4.79, + "grad_norm": 0.23127637803554535, + "learning_rate": 3.506192099650098e-05, + "loss": 0.03, + "step": 230 + }, + { + "epoch": 4.9, + "grad_norm": 23.879899978637695, + "learning_rate": 3.451407848093066e-05, + "loss": 0.1969, + "step": 235 + }, + { + "epoch": 5.0, + "grad_norm": 0.2918482720851898, + "learning_rate": 3.382927533646775e-05, + "loss": 0.2145, + "step": 240 + }, + { + "epoch": 5.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.1479806900024414, + "eval_runtime": 1.3695, + "eval_samples_per_second": 46.731, + "eval_steps_per_second": 5.841, + "step": 240 + }, + { + "epoch": 5.1, + "grad_norm": 0.22944150865077972, + "learning_rate": 3.3144472192004837e-05, + "loss": 0.018, + "step": 245 + }, + { + "epoch": 5.21, + "grad_norm": 2.509162425994873, + "learning_rate": 3.245966904754193e-05, + "loss": 0.0209, + "step": 250 + }, + { + "epoch": 5.31, + "grad_norm": 1.8163331747055054, + "learning_rate": 3.177486590307902e-05, + "loss": 0.3779, + "step": 255 + }, + { + "epoch": 5.42, + "grad_norm": 10.014881134033203, + "learning_rate": 3.10900627586161e-05, + "loss": 0.0881, + "step": 260 + }, + { + "epoch": 5.52, + "grad_norm": 0.18950092792510986, + "learning_rate": 3.0405259614153195e-05, + "loss": 0.0141, + "step": 265 + }, + { + "epoch": 5.62, + "grad_norm": 0.19155395030975342, + "learning_rate": 2.9720456469690287e-05, + "loss": 0.142, + "step": 270 + }, + { + "epoch": 5.73, + "grad_norm": 73.96759033203125, + "learning_rate": 2.903565332522738e-05, + "loss": 0.5048, + "step": 275 + }, + { + "epoch": 5.83, + "grad_norm": 0.18716034293174744, + "learning_rate": 2.8350850180764468e-05, + "loss": 0.2629, + "step": 280 + }, + { + "epoch": 5.94, + "grad_norm": 4.706090450286865, + "learning_rate": 2.766604703630156e-05, + "loss": 0.2084, + "step": 285 + }, + { + "epoch": 6.0, + "eval_f1": 0.782608695652174, + "eval_loss": 1.4499547481536865, + "eval_runtime": 1.3743, + "eval_samples_per_second": 46.571, + "eval_steps_per_second": 5.821, + "step": 288 + }, + { + "epoch": 6.04, + "grad_norm": 0.12213920056819916, + "learning_rate": 2.698124389183865e-05, + "loss": 0.2129, + "step": 290 + }, + { + "epoch": 6.15, + "grad_norm": 0.1558620184659958, + "learning_rate": 2.6296440747375737e-05, + "loss": 0.0102, + "step": 295 + }, + { + "epoch": 6.25, + "grad_norm": 0.12462862581014633, + "learning_rate": 2.561163760291283e-05, + "loss": 0.0184, + "step": 300 + }, + { + "epoch": 6.35, + "grad_norm": 1.7250605821609497, + "learning_rate": 2.4926834458449918e-05, + "loss": 0.0349, + "step": 305 + }, + { + "epoch": 6.46, + "grad_norm": 0.11989390850067139, + "learning_rate": 2.424203131398701e-05, + "loss": 0.1003, + "step": 310 + }, + { + "epoch": 6.56, + "grad_norm": 0.14550165832042694, + "learning_rate": 2.35572281695241e-05, + "loss": 0.201, + "step": 315 + }, + { + "epoch": 6.67, + "grad_norm": 102.21455383300781, + "learning_rate": 2.287242502506119e-05, + "loss": 0.2073, + "step": 320 + }, + { + "epoch": 6.77, + "grad_norm": 0.330213338136673, + "learning_rate": 2.218762188059828e-05, + "loss": 0.2589, + "step": 325 + }, + { + "epoch": 6.88, + "grad_norm": 1.8776073455810547, + "learning_rate": 2.1502818736135368e-05, + "loss": 0.0065, + "step": 330 + }, + { + "epoch": 6.98, + "grad_norm": 0.09140625596046448, + "learning_rate": 2.081801559167246e-05, + "loss": 0.2754, + "step": 335 + }, + { + "epoch": 7.0, + "eval_f1": 0.7500000000000001, + "eval_loss": 1.7005689144134521, + "eval_runtime": 1.3874, + "eval_samples_per_second": 46.131, + "eval_steps_per_second": 5.766, + "step": 336 + }, + { + "epoch": 7.08, + "grad_norm": 25.047473907470703, + "learning_rate": 2.013321244720955e-05, + "loss": 0.2759, + "step": 340 + }, + { + "epoch": 7.19, + "grad_norm": 0.08365596830844879, + "learning_rate": 1.944840930274664e-05, + "loss": 0.0138, + "step": 345 + }, + { + "epoch": 7.29, + "grad_norm": 0.13810297846794128, + "learning_rate": 1.876360615828373e-05, + "loss": 0.0065, + "step": 350 + }, + { + "epoch": 7.4, + "grad_norm": 6.976583003997803, + "learning_rate": 1.8078803013820822e-05, + "loss": 0.0071, + "step": 355 + }, + { + "epoch": 7.5, + "grad_norm": 0.07699556648731232, + "learning_rate": 1.739399986935791e-05, + "loss": 0.0057, + "step": 360 + }, + { + "epoch": 7.6, + "grad_norm": 0.11107699573040009, + "learning_rate": 1.6709196724895e-05, + "loss": 0.005, + "step": 365 + }, + { + "epoch": 7.71, + "grad_norm": 0.2534348964691162, + "learning_rate": 1.602439358043209e-05, + "loss": 0.0116, + "step": 370 + }, + { + "epoch": 7.81, + "grad_norm": 0.06619343161582947, + "learning_rate": 1.533959043596918e-05, + "loss": 0.2779, + "step": 375 + }, + { + "epoch": 7.92, + "grad_norm": 0.0785360187292099, + "learning_rate": 1.465478729150627e-05, + "loss": 0.0047, + "step": 380 + }, + { + "epoch": 8.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.398769497871399, + "eval_runtime": 1.4115, + "eval_samples_per_second": 45.342, + "eval_steps_per_second": 5.668, + "step": 384 + }, + { + "epoch": 8.02, + "grad_norm": 0.15715888142585754, + "learning_rate": 1.396998414704336e-05, + "loss": 0.0055, + "step": 385 + }, + { + "epoch": 8.12, + "grad_norm": 0.10979782044887543, + "learning_rate": 1.3285181002580451e-05, + "loss": 0.0043, + "step": 390 + }, + { + "epoch": 8.23, + "grad_norm": 0.06223106011748314, + "learning_rate": 1.2600377858117542e-05, + "loss": 0.2033, + "step": 395 + }, + { + "epoch": 8.33, + "grad_norm": 0.07482080906629562, + "learning_rate": 1.1915574713654632e-05, + "loss": 0.2709, + "step": 400 + }, + { + "epoch": 8.44, + "grad_norm": 0.10627646744251251, + "learning_rate": 1.1230771569191722e-05, + "loss": 0.0043, + "step": 405 + }, + { + "epoch": 8.54, + "grad_norm": 0.10625725984573364, + "learning_rate": 1.0545968424728811e-05, + "loss": 0.0044, + "step": 410 + }, + { + "epoch": 8.65, + "grad_norm": 0.0651024580001831, + "learning_rate": 9.861165280265901e-06, + "loss": 0.0047, + "step": 415 + }, + { + "epoch": 8.75, + "grad_norm": 0.09479642659425735, + "learning_rate": 9.313322764695574e-06, + "loss": 0.1092, + "step": 420 + }, + { + "epoch": 8.85, + "grad_norm": 0.07610337436199188, + "learning_rate": 8.628519620232665e-06, + "loss": 0.0048, + "step": 425 + }, + { + "epoch": 8.96, + "grad_norm": 0.07757245749235153, + "learning_rate": 7.943716475769755e-06, + "loss": 0.0056, + "step": 430 + }, + { + "epoch": 9.0, + "eval_f1": 0.8000000000000002, + "eval_loss": 1.4035369157791138, + "eval_runtime": 1.3971, + "eval_samples_per_second": 45.81, + "eval_steps_per_second": 5.726, + "step": 432 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 6570556792277328.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-432/training_args.bin b/run-16/checkpoint-432/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-432/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-48/config.json b/run-16/checkpoint-48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-48/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-48/model.safetensors b/run-16/checkpoint-48/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0633655c4d5106005102b8364ddd939f90b5610e --- /dev/null +++ b/run-16/checkpoint-48/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de09a4abd2f269b623af7643a43dbd085b1d9ab2f005e1b71f1a4a373e754e7 +size 94763496 diff --git a/run-16/checkpoint-48/optimizer.pt b/run-16/checkpoint-48/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2391c17157cb4e115c03c9056762d4645789c09e --- /dev/null +++ b/run-16/checkpoint-48/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0d8ed0e2e67e8008b1d240931ee532f0e8eba7f5e1ad87dafba787e8fccc02 +size 189552570 diff --git a/run-16/checkpoint-48/preprocessor_config.json b/run-16/checkpoint-48/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-48/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-48/rng_state.pth b/run-16/checkpoint-48/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f69ac2b3cc24a2d23f1e99dfab26d0a1d84a680 --- /dev/null +++ b/run-16/checkpoint-48/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7251f0e64bf9e5675ed89b468a7ff74c1c3fd6457742f84db0e5e361db11f13 +size 14244 diff --git a/run-16/checkpoint-48/scheduler.pt b/run-16/checkpoint-48/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..38cafde98b6ae60e6428249bdb168d45ac641424 --- /dev/null +++ b/run-16/checkpoint-48/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e7596b27b06163040f053a087352b5936de1bdb43c41e1bde43d7f83d191b8 +size 1064 diff --git a/run-16/checkpoint-48/trainer_state.json b/run-16/checkpoint-48/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f1f68bd97db2982b79687684dc3c5c8331bc8f39 --- /dev/null +++ b/run-16/checkpoint-48/trainer_state.json @@ -0,0 +1,96 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-48", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 48, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 692215198094688.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-48/training_args.bin b/run-16/checkpoint-48/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-48/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-480/config.json b/run-16/checkpoint-480/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-480/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-480/model.safetensors b/run-16/checkpoint-480/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ef04131a875856d31a88b030b64f643909521a0 --- /dev/null +++ b/run-16/checkpoint-480/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:773595496246366a3b01f87a3fc643aa0fc7e144ba443236f4c8f0899342ebf5 +size 94763496 diff --git a/run-16/checkpoint-480/optimizer.pt b/run-16/checkpoint-480/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..73c062f83ff2dae84ade8b3df3e36f8418055db8 --- /dev/null +++ b/run-16/checkpoint-480/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63052258b1a484283f73d51de6991c58510f55bc99d5af8500534c1ee218ae9 +size 189552570 diff --git a/run-16/checkpoint-480/preprocessor_config.json b/run-16/checkpoint-480/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-480/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-480/rng_state.pth b/run-16/checkpoint-480/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8ce0733889ec145e042b57c109b1b3747a5e4b8 --- /dev/null +++ b/run-16/checkpoint-480/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50237159e3e933ca82ae35db92d0c845d9cc1581f3410598daa2edb356446877 +size 14244 diff --git a/run-16/checkpoint-480/scheduler.pt b/run-16/checkpoint-480/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f0e23e680be64f2a05172de12d3376350e6e7b6 --- /dev/null +++ b/run-16/checkpoint-480/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcd46cf0e9075481585addedd5562210e8ce643bf15434ed891153838da19238 +size 1064 diff --git a/run-16/checkpoint-480/trainer_state.json b/run-16/checkpoint-480/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fa77548344c5550878aeec8c1f0ce2337af8279f --- /dev/null +++ b/run-16/checkpoint-480/trainer_state.json @@ -0,0 +1,786 @@ +{ + "best_metric": 0.8089887640449438, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-240", + "epoch": 10.0, + "eval_steps": 500, + "global_step": 480, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + }, + { + "epoch": 2.08, + "grad_norm": 1.276584267616272, + "learning_rate": 5.231896023696631e-05, + "loss": 0.5767, + "step": 100 + }, + { + "epoch": 2.19, + "grad_norm": 1.1047593355178833, + "learning_rate": 5.163415709250341e-05, + "loss": 0.4715, + "step": 105 + }, + { + "epoch": 2.29, + "grad_norm": 1.551737666130066, + "learning_rate": 5.0949353948040496e-05, + "loss": 0.5687, + "step": 110 + }, + { + "epoch": 2.4, + "grad_norm": 1.3372799158096313, + "learning_rate": 5.0264550803577585e-05, + "loss": 0.5399, + "step": 115 + }, + { + "epoch": 2.5, + "grad_norm": 4.043832778930664, + "learning_rate": 4.9716708288007255e-05, + "loss": 0.5178, + "step": 120 + }, + { + "epoch": 2.6, + "grad_norm": 5.607926845550537, + "learning_rate": 4.9031905143544343e-05, + "loss": 0.4201, + "step": 125 + }, + { + "epoch": 2.71, + "grad_norm": 10.094707489013672, + "learning_rate": 4.834710199908144e-05, + "loss": 0.6339, + "step": 130 + }, + { + "epoch": 2.81, + "grad_norm": 2.790902614593506, + "learning_rate": 4.766229885461853e-05, + "loss": 0.3564, + "step": 135 + }, + { + "epoch": 2.92, + "grad_norm": 8.226430892944336, + "learning_rate": 4.6977495710155616e-05, + "loss": 0.2918, + "step": 140 + }, + { + "epoch": 3.0, + "eval_f1": 0.72, + "eval_loss": 0.8946094512939453, + "eval_runtime": 1.3813, + "eval_samples_per_second": 46.334, + "eval_steps_per_second": 5.792, + "step": 144 + }, + { + "epoch": 3.02, + "grad_norm": 3.521246910095215, + "learning_rate": 4.6292692565692705e-05, + "loss": 0.4787, + "step": 145 + }, + { + "epoch": 3.12, + "grad_norm": Infinity, + "learning_rate": 4.574485005012238e-05, + "loss": 0.3851, + "step": 150 + }, + { + "epoch": 3.23, + "grad_norm": 3.6489925384521484, + "learning_rate": 4.506004690565947e-05, + "loss": 0.2152, + "step": 155 + }, + { + "epoch": 3.33, + "grad_norm": 9.47097396850586, + "learning_rate": 4.451220439008914e-05, + "loss": 0.6503, + "step": 160 + }, + { + "epoch": 3.44, + "grad_norm": 1.4761028289794922, + "learning_rate": 4.382740124562623e-05, + "loss": 0.177, + "step": 165 + }, + { + "epoch": 3.54, + "grad_norm": 15.07903003692627, + "learning_rate": 4.314259810116332e-05, + "loss": 0.3834, + "step": 170 + }, + { + "epoch": 3.65, + "grad_norm": 98.74039459228516, + "learning_rate": 4.245779495670041e-05, + "loss": 0.816, + "step": 175 + }, + { + "epoch": 3.75, + "grad_norm": 0.6765172481536865, + "learning_rate": 4.17729918122375e-05, + "loss": 0.3177, + "step": 180 + }, + { + "epoch": 3.85, + "grad_norm": 1.5212448835372925, + "learning_rate": 4.108818866777459e-05, + "loss": 0.3593, + "step": 185 + }, + { + "epoch": 3.96, + "grad_norm": 27.127269744873047, + "learning_rate": 4.040338552331168e-05, + "loss": 0.3041, + "step": 190 + }, + { + "epoch": 4.0, + "eval_f1": 0.7692307692307693, + "eval_loss": 1.0041449069976807, + "eval_runtime": 1.3873, + "eval_samples_per_second": 46.134, + "eval_steps_per_second": 5.767, + "step": 192 + }, + { + "epoch": 4.06, + "grad_norm": 0.7140729427337646, + "learning_rate": 3.9718582378848774e-05, + "loss": 0.2064, + "step": 195 + }, + { + "epoch": 4.17, + "grad_norm": 4.367918491363525, + "learning_rate": 3.903377923438586e-05, + "loss": 0.365, + "step": 200 + }, + { + "epoch": 4.27, + "grad_norm": 0.4135078489780426, + "learning_rate": 3.834897608992295e-05, + "loss": 0.3944, + "step": 205 + }, + { + "epoch": 4.38, + "grad_norm": 88.720458984375, + "learning_rate": 3.766417294546004e-05, + "loss": 0.3774, + "step": 210 + }, + { + "epoch": 4.48, + "grad_norm": 1.5230985879898071, + "learning_rate": 3.697936980099713e-05, + "loss": 0.0844, + "step": 215 + }, + { + "epoch": 4.58, + "grad_norm": Infinity, + "learning_rate": 3.6431527285426805e-05, + "loss": 0.3702, + "step": 220 + }, + { + "epoch": 4.69, + "grad_norm": 39.243804931640625, + "learning_rate": 3.5746724140963894e-05, + "loss": 0.3381, + "step": 225 + }, + { + "epoch": 4.79, + "grad_norm": 0.23127637803554535, + "learning_rate": 3.506192099650098e-05, + "loss": 0.03, + "step": 230 + }, + { + "epoch": 4.9, + "grad_norm": 23.879899978637695, + "learning_rate": 3.451407848093066e-05, + "loss": 0.1969, + "step": 235 + }, + { + "epoch": 5.0, + "grad_norm": 0.2918482720851898, + "learning_rate": 3.382927533646775e-05, + "loss": 0.2145, + "step": 240 + }, + { + "epoch": 5.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.1479806900024414, + "eval_runtime": 1.3695, + "eval_samples_per_second": 46.731, + "eval_steps_per_second": 5.841, + "step": 240 + }, + { + "epoch": 5.1, + "grad_norm": 0.22944150865077972, + "learning_rate": 3.3144472192004837e-05, + "loss": 0.018, + "step": 245 + }, + { + "epoch": 5.21, + "grad_norm": 2.509162425994873, + "learning_rate": 3.245966904754193e-05, + "loss": 0.0209, + "step": 250 + }, + { + "epoch": 5.31, + "grad_norm": 1.8163331747055054, + "learning_rate": 3.177486590307902e-05, + "loss": 0.3779, + "step": 255 + }, + { + "epoch": 5.42, + "grad_norm": 10.014881134033203, + "learning_rate": 3.10900627586161e-05, + "loss": 0.0881, + "step": 260 + }, + { + "epoch": 5.52, + "grad_norm": 0.18950092792510986, + "learning_rate": 3.0405259614153195e-05, + "loss": 0.0141, + "step": 265 + }, + { + "epoch": 5.62, + "grad_norm": 0.19155395030975342, + "learning_rate": 2.9720456469690287e-05, + "loss": 0.142, + "step": 270 + }, + { + "epoch": 5.73, + "grad_norm": 73.96759033203125, + "learning_rate": 2.903565332522738e-05, + "loss": 0.5048, + "step": 275 + }, + { + "epoch": 5.83, + "grad_norm": 0.18716034293174744, + "learning_rate": 2.8350850180764468e-05, + "loss": 0.2629, + "step": 280 + }, + { + "epoch": 5.94, + "grad_norm": 4.706090450286865, + "learning_rate": 2.766604703630156e-05, + "loss": 0.2084, + "step": 285 + }, + { + "epoch": 6.0, + "eval_f1": 0.782608695652174, + "eval_loss": 1.4499547481536865, + "eval_runtime": 1.3743, + "eval_samples_per_second": 46.571, + "eval_steps_per_second": 5.821, + "step": 288 + }, + { + "epoch": 6.04, + "grad_norm": 0.12213920056819916, + "learning_rate": 2.698124389183865e-05, + "loss": 0.2129, + "step": 290 + }, + { + "epoch": 6.15, + "grad_norm": 0.1558620184659958, + "learning_rate": 2.6296440747375737e-05, + "loss": 0.0102, + "step": 295 + }, + { + "epoch": 6.25, + "grad_norm": 0.12462862581014633, + "learning_rate": 2.561163760291283e-05, + "loss": 0.0184, + "step": 300 + }, + { + "epoch": 6.35, + "grad_norm": 1.7250605821609497, + "learning_rate": 2.4926834458449918e-05, + "loss": 0.0349, + "step": 305 + }, + { + "epoch": 6.46, + "grad_norm": 0.11989390850067139, + "learning_rate": 2.424203131398701e-05, + "loss": 0.1003, + "step": 310 + }, + { + "epoch": 6.56, + "grad_norm": 0.14550165832042694, + "learning_rate": 2.35572281695241e-05, + "loss": 0.201, + "step": 315 + }, + { + "epoch": 6.67, + "grad_norm": 102.21455383300781, + "learning_rate": 2.287242502506119e-05, + "loss": 0.2073, + "step": 320 + }, + { + "epoch": 6.77, + "grad_norm": 0.330213338136673, + "learning_rate": 2.218762188059828e-05, + "loss": 0.2589, + "step": 325 + }, + { + "epoch": 6.88, + "grad_norm": 1.8776073455810547, + "learning_rate": 2.1502818736135368e-05, + "loss": 0.0065, + "step": 330 + }, + { + "epoch": 6.98, + "grad_norm": 0.09140625596046448, + "learning_rate": 2.081801559167246e-05, + "loss": 0.2754, + "step": 335 + }, + { + "epoch": 7.0, + "eval_f1": 0.7500000000000001, + "eval_loss": 1.7005689144134521, + "eval_runtime": 1.3874, + "eval_samples_per_second": 46.131, + "eval_steps_per_second": 5.766, + "step": 336 + }, + { + "epoch": 7.08, + "grad_norm": 25.047473907470703, + "learning_rate": 2.013321244720955e-05, + "loss": 0.2759, + "step": 340 + }, + { + "epoch": 7.19, + "grad_norm": 0.08365596830844879, + "learning_rate": 1.944840930274664e-05, + "loss": 0.0138, + "step": 345 + }, + { + "epoch": 7.29, + "grad_norm": 0.13810297846794128, + "learning_rate": 1.876360615828373e-05, + "loss": 0.0065, + "step": 350 + }, + { + "epoch": 7.4, + "grad_norm": 6.976583003997803, + "learning_rate": 1.8078803013820822e-05, + "loss": 0.0071, + "step": 355 + }, + { + "epoch": 7.5, + "grad_norm": 0.07699556648731232, + "learning_rate": 1.739399986935791e-05, + "loss": 0.0057, + "step": 360 + }, + { + "epoch": 7.6, + "grad_norm": 0.11107699573040009, + "learning_rate": 1.6709196724895e-05, + "loss": 0.005, + "step": 365 + }, + { + "epoch": 7.71, + "grad_norm": 0.2534348964691162, + "learning_rate": 1.602439358043209e-05, + "loss": 0.0116, + "step": 370 + }, + { + "epoch": 7.81, + "grad_norm": 0.06619343161582947, + "learning_rate": 1.533959043596918e-05, + "loss": 0.2779, + "step": 375 + }, + { + "epoch": 7.92, + "grad_norm": 0.0785360187292099, + "learning_rate": 1.465478729150627e-05, + "loss": 0.0047, + "step": 380 + }, + { + "epoch": 8.0, + "eval_f1": 0.8089887640449438, + "eval_loss": 1.398769497871399, + "eval_runtime": 1.4115, + "eval_samples_per_second": 45.342, + "eval_steps_per_second": 5.668, + "step": 384 + }, + { + "epoch": 8.02, + "grad_norm": 0.15715888142585754, + "learning_rate": 1.396998414704336e-05, + "loss": 0.0055, + "step": 385 + }, + { + "epoch": 8.12, + "grad_norm": 0.10979782044887543, + "learning_rate": 1.3285181002580451e-05, + "loss": 0.0043, + "step": 390 + }, + { + "epoch": 8.23, + "grad_norm": 0.06223106011748314, + "learning_rate": 1.2600377858117542e-05, + "loss": 0.2033, + "step": 395 + }, + { + "epoch": 8.33, + "grad_norm": 0.07482080906629562, + "learning_rate": 1.1915574713654632e-05, + "loss": 0.2709, + "step": 400 + }, + { + "epoch": 8.44, + "grad_norm": 0.10627646744251251, + "learning_rate": 1.1230771569191722e-05, + "loss": 0.0043, + "step": 405 + }, + { + "epoch": 8.54, + "grad_norm": 0.10625725984573364, + "learning_rate": 1.0545968424728811e-05, + "loss": 0.0044, + "step": 410 + }, + { + "epoch": 8.65, + "grad_norm": 0.0651024580001831, + "learning_rate": 9.861165280265901e-06, + "loss": 0.0047, + "step": 415 + }, + { + "epoch": 8.75, + "grad_norm": 0.09479642659425735, + "learning_rate": 9.313322764695574e-06, + "loss": 0.1092, + "step": 420 + }, + { + "epoch": 8.85, + "grad_norm": 0.07610337436199188, + "learning_rate": 8.628519620232665e-06, + "loss": 0.0048, + "step": 425 + }, + { + "epoch": 8.96, + "grad_norm": 0.07757245749235153, + "learning_rate": 7.943716475769755e-06, + "loss": 0.0056, + "step": 430 + }, + { + "epoch": 9.0, + "eval_f1": 0.8000000000000002, + "eval_loss": 1.4035369157791138, + "eval_runtime": 1.3971, + "eval_samples_per_second": 45.81, + "eval_steps_per_second": 5.726, + "step": 432 + }, + { + "epoch": 9.06, + "grad_norm": 0.07883503288030624, + "learning_rate": 7.258913331306845e-06, + "loss": 0.0043, + "step": 435 + }, + { + "epoch": 9.17, + "grad_norm": 0.07094341516494751, + "learning_rate": 6.574110186843934e-06, + "loss": 0.0034, + "step": 440 + }, + { + "epoch": 9.27, + "grad_norm": 0.20597043633460999, + "learning_rate": 5.889307042381025e-06, + "loss": 0.0042, + "step": 445 + }, + { + "epoch": 9.38, + "grad_norm": 0.12582358717918396, + "learning_rate": 5.204503897918115e-06, + "loss": 0.0041, + "step": 450 + }, + { + "epoch": 9.48, + "grad_norm": 0.07050727307796478, + "learning_rate": 4.5197007534552054e-06, + "loss": 0.0061, + "step": 455 + }, + { + "epoch": 9.58, + "grad_norm": 0.07429838180541992, + "learning_rate": 3.834897608992295e-06, + "loss": 0.0043, + "step": 460 + }, + { + "epoch": 9.69, + "grad_norm": 0.0814225897192955, + "learning_rate": 3.1500944645293854e-06, + "loss": 0.0037, + "step": 465 + }, + { + "epoch": 9.79, + "grad_norm": 0.08201207220554352, + "learning_rate": 2.4652913200664753e-06, + "loss": 0.0035, + "step": 470 + }, + { + "epoch": 9.9, + "grad_norm": 0.06246696412563324, + "learning_rate": 1.7804881756035655e-06, + "loss": 0.3188, + "step": 475 + }, + { + "epoch": 10.0, + "grad_norm": 0.12704825401306152, + "learning_rate": 1.0956850311406557e-06, + "loss": 0.0036, + "step": 480 + }, + { + "epoch": 10.0, + "eval_f1": 0.7857142857142857, + "eval_loss": 1.4464069604873657, + "eval_runtime": 1.4226, + "eval_samples_per_second": 44.988, + "eval_steps_per_second": 5.624, + "step": 480 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7328048267547168.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-480/training_args.bin b/run-16/checkpoint-480/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-480/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-16/checkpoint-96/config.json b/run-16/checkpoint-96/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-16/checkpoint-96/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-16/checkpoint-96/model.safetensors b/run-16/checkpoint-96/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6fa422ed524138501e0df51464166456890e5f8 --- /dev/null +++ b/run-16/checkpoint-96/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88af3ce517beea7876985d0c9efab948e5f90dbaab4f9f9026990b48aced3c0b +size 94763496 diff --git a/run-16/checkpoint-96/optimizer.pt b/run-16/checkpoint-96/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..013108b2f9486e32a8c65a8a799a027a78a5114b --- /dev/null +++ b/run-16/checkpoint-96/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701c6779d94c7023a78a4c8d3a0b2fa80238396bf72a9dd14dc8ff40302fd577 +size 189552570 diff --git a/run-16/checkpoint-96/preprocessor_config.json b/run-16/checkpoint-96/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-16/checkpoint-96/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-16/checkpoint-96/rng_state.pth b/run-16/checkpoint-96/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5df9532d48eec28233ca1958234673b2505309f1 --- /dev/null +++ b/run-16/checkpoint-96/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39dbf03bf644af79257aec95c925042cb81a469bfcc7a839a95d68f1d0425513 +size 14244 diff --git a/run-16/checkpoint-96/scheduler.pt b/run-16/checkpoint-96/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec3d0fc9852e1c84e99e6017919d4a95e845c4b3 --- /dev/null +++ b/run-16/checkpoint-96/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f2fbefd15ddb5c6fc274f8952efb63a9db2899c2ca4da9ce8a744d90abd37f8 +size 1064 diff --git a/run-16/checkpoint-96/trainer_state.json b/run-16/checkpoint-96/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..91729f42282d86caba504ee5d44a040b6778910d --- /dev/null +++ b/run-16/checkpoint-96/trainer_state.json @@ -0,0 +1,175 @@ +{ + "best_metric": 0.74, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-16/checkpoint-96", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 96, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2602354288101196, + "learning_rate": 6.163228300166189e-06, + "loss": 0.7018, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.1513686180114746, + "learning_rate": 1.2326456600332378e-05, + "loss": 0.6815, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.21480131149292, + "learning_rate": 1.8489684900498564e-05, + "loss": 0.6818, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8887011408805847, + "learning_rate": 2.4652913200664756e-05, + "loss": 0.6556, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.8671120405197144, + "learning_rate": 3.081614150083095e-05, + "loss": 0.6284, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.6801092028617859, + "learning_rate": 3.697936980099713e-05, + "loss": 0.6166, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 2.9359517097473145, + "learning_rate": 4.314259810116332e-05, + "loss": 0.5407, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.6087604761123657, + "learning_rate": 4.930582640132951e-05, + "loss": 0.6755, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.7170913219451904, + "learning_rate": 5.54690547014957e-05, + "loss": 0.6359, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7614002227783203, + "eval_runtime": 1.3801, + "eval_samples_per_second": 46.373, + "eval_steps_per_second": 5.797, + "step": 48 + }, + { + "epoch": 1.04, + "grad_norm": 4.872888565063477, + "learning_rate": 5.889307042381025e-05, + "loss": 0.7115, + "step": 50 + }, + { + "epoch": 1.15, + "grad_norm": 0.9400704503059387, + "learning_rate": 5.820826727934734e-05, + "loss": 0.7024, + "step": 55 + }, + { + "epoch": 1.25, + "grad_norm": 2.040548086166382, + "learning_rate": 5.752346413488443e-05, + "loss": 0.4111, + "step": 60 + }, + { + "epoch": 1.35, + "grad_norm": 0.7833982706069946, + "learning_rate": 5.6838660990421516e-05, + "loss": 0.5595, + "step": 65 + }, + { + "epoch": 1.46, + "grad_norm": 1.4219136238098145, + "learning_rate": 5.615385784595861e-05, + "loss": 0.4831, + "step": 70 + }, + { + "epoch": 1.56, + "grad_norm": Infinity, + "learning_rate": 5.560601533038828e-05, + "loss": 0.7837, + "step": 75 + }, + { + "epoch": 1.67, + "grad_norm": 2.069016218185425, + "learning_rate": 5.492121218592537e-05, + "loss": 0.62, + "step": 80 + }, + { + "epoch": 1.77, + "grad_norm": 1.6181252002716064, + "learning_rate": 5.423640904146246e-05, + "loss": 0.5911, + "step": 85 + }, + { + "epoch": 1.88, + "grad_norm": 1.8916351795196533, + "learning_rate": 5.355160589699955e-05, + "loss": 0.5905, + "step": 90 + }, + { + "epoch": 1.98, + "grad_norm": 3.4919614791870117, + "learning_rate": 5.286680275253664e-05, + "loss": 0.5972, + "step": 95 + }, + { + "epoch": 2.0, + "eval_f1": 0.74, + "eval_loss": 0.6621341705322266, + "eval_runtime": 1.3734, + "eval_samples_per_second": 46.601, + "eval_steps_per_second": 5.825, + "step": 96 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1464096529698768.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 5.916699168159541e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-16/checkpoint-96/training_args.bin b/run-16/checkpoint-96/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..18efe4f85a261944fb187f56c8cfa182c42f8e35 --- /dev/null +++ b/run-16/checkpoint-96/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00bdab3162ed6676e0bdfcdd5379976fc4a6b2c896fd6e20da443dfbeac5a8 +size 4920 diff --git a/run-17/checkpoint-48/config.json b/run-17/checkpoint-48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-17/checkpoint-48/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-17/checkpoint-48/model.safetensors b/run-17/checkpoint-48/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87cdb3217aa5c39882779809f1c96596fd16c1b4 --- /dev/null +++ b/run-17/checkpoint-48/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa9c57c1a422417816b316606c2d878a3bcb3f7c6d7607c8f81010ace74cee3 +size 94763496 diff --git a/run-17/checkpoint-48/optimizer.pt b/run-17/checkpoint-48/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0f3b47fff7111e4861ad419d241eb39abf39109 --- /dev/null +++ b/run-17/checkpoint-48/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4c539da9eebd883cdbd2c5e3bad8e5147be8d45ae44d83a752d2935509179ee +size 189552570 diff --git a/run-17/checkpoint-48/preprocessor_config.json b/run-17/checkpoint-48/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-17/checkpoint-48/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-17/checkpoint-48/rng_state.pth b/run-17/checkpoint-48/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f69ac2b3cc24a2d23f1e99dfab26d0a1d84a680 --- /dev/null +++ b/run-17/checkpoint-48/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7251f0e64bf9e5675ed89b468a7ff74c1c3fd6457742f84db0e5e361db11f13 +size 14244 diff --git a/run-17/checkpoint-48/scheduler.pt b/run-17/checkpoint-48/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..139dc4c01f6ca5235487a92f5d4f90323d791235 --- /dev/null +++ b/run-17/checkpoint-48/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b504d78e3c20e28bad3ba3d9006ea9b061716eebf19d32c0d189efcad7513728 +size 1064 diff --git a/run-17/checkpoint-48/trainer_state.json b/run-17/checkpoint-48/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9b00dd852d16a82948f9c0dc3107f05bfc0d59ec --- /dev/null +++ b/run-17/checkpoint-48/trainer_state.json @@ -0,0 +1,96 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-17/checkpoint-48", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 48, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2570356130599976, + "learning_rate": 7.514138837771986e-06, + "loss": 0.7017, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.132084846496582, + "learning_rate": 1.5028277675543972e-05, + "loss": 0.678, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.1500951051712036, + "learning_rate": 2.2542416513315956e-05, + "loss": 0.6796, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 0.8457872867584229, + "learning_rate": 3.0056555351087943e-05, + "loss": 0.6488, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 0.7979240417480469, + "learning_rate": 3.757069418885993e-05, + "loss": 0.6218, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 0.605031430721283, + "learning_rate": 4.508483302663191e-05, + "loss": 0.612, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 3.2487857341766357, + "learning_rate": 5.2598971864403895e-05, + "loss": 0.5324, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 1.5509129762649536, + "learning_rate": 6.0113110702175886e-05, + "loss": 0.6915, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 3.975639581680298, + "learning_rate": 6.762724953994786e-05, + "loss": 0.643, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7643775939941406, + "eval_runtime": 1.3793, + "eval_samples_per_second": 46.401, + "eval_steps_per_second": 5.8, + "step": 48 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 670686130935120.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 7.213573284261106e-05, + "per_device_train_batch_size": 4 + } +} diff --git a/run-17/checkpoint-48/training_args.bin b/run-17/checkpoint-48/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c3142c9d5adcfcf2aa8fbf1fe02bc558ba2327f4 --- /dev/null +++ b/run-17/checkpoint-48/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a6b489e5b3e9f9bd8287d6a1cc3cd31b4da1f04bd7cd89d76f6c36931f8868 +size 4920 diff --git a/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492925.ca56ea9bc35e.3883.15 b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492925.ca56ea9bc35e.3883.15 new file mode 100644 index 0000000000000000000000000000000000000000..588339587747a77fb3dde972a5d4f33bc1a8bcf6 --- /dev/null +++ b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492925.ca56ea9bc35e.3883.15 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3855514a7cd125fa29110168f50056e8383cba1ef98450a82d5efcfcbd47ffd +size 14693 diff --git a/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492954.ca56ea9bc35e.3883.16 b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492954.ca56ea9bc35e.3883.16 new file mode 100644 index 0000000000000000000000000000000000000000..486d41b61dde3c2b41a2eec1194556e7f6905e54 --- /dev/null +++ b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492954.ca56ea9bc35e.3883.16 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414b901ffd56ecfe1f28ed5a8c15344b7147e7139cb4e54b8e80109d2c706610 +size 14693 diff --git a/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492984.ca56ea9bc35e.3883.17 b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492984.ca56ea9bc35e.3883.17 new file mode 100644 index 0000000000000000000000000000000000000000..f762667d13946f9df5ecea6ac05d5c11122af00e --- /dev/null +++ b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492984.ca56ea9bc35e.3883.17 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d32ea6d5374542dceb3235da0921e8fea530ed2218ba3b97c73f1c82248bfabd +size 29187 diff --git a/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709493147.ca56ea9bc35e.3883.18 b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709493147.ca56ea9bc35e.3883.18 new file mode 100644 index 0000000000000000000000000000000000000000..40be0e398087c3b15e29680e9efaac1b93bce3b5 --- /dev/null +++ b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709493147.ca56ea9bc35e.3883.18 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7c5d8970c3d69e89a55123bc00dd62d76ccc56fb2b00e558108bac7345574e +size 17120 diff --git a/training_args.bin b/training_args.bin index 2e56ba67a8f472c421da9b268a6a057d0272d2b2..c3142c9d5adcfcf2aa8fbf1fe02bc558ba2327f4 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6181d77c6ec1f535fee0fbb436cf5e7d6cbe4ec68fcd3368e6e1056eece160ff +oid sha256:e7a6b489e5b3e9f9bd8287d6a1cc3cd31b4da1f04bd7cd89d76f6c36931f8868 size 4920