diff --git a/README.md b/README.md index dd851dd36b78230b4e36e8f178ac52581673014c..6545c786caf1ad7e7f47d28aadf28549c6617263 100644 --- a/README.md +++ b/README.md @@ -4,26 +4,14 @@ license: apache-2.0 base_model: ntu-spml/distilhubert tags: - generated_from_trainer -datasets: -- audiofolder metrics: - accuracy +- precision +- recall +- f1 model-index: - name: distilhubert-finetuned-mixed-data - results: - - task: - name: Audio Classification - type: audio-classification - dataset: - name: audiofolder - type: audiofolder - config: default - split: train - args: default - metrics: - - name: Accuracy - type: accuracy - value: 0.8691275167785235 + results: [] --- # distilhubert-finetuned-mixed-data -This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on the audiofolder dataset. +This model is a fine-tuned version of [ntu-spml/distilhubert](https://huggingface.co/ntu-spml/distilhubert) on an unknown dataset. It achieves the following results on the evaluation set: -- Loss: 0.3360 -- Accuracy: 0.8691 +- Loss: 0.4765 +- Accuracy: 0.8475 +- Precision: 0.8497 +- Recall: 0.8475 +- F1: 0.8484 ## Model description @@ -61,25 +52,30 @@ The following hyperparameters were used during training: - total_train_batch_size: 64 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 - lr_scheduler_type: cosine -- lr_scheduler_warmup_ratio: 0.001 -- num_epochs: 7 +- lr_scheduler_warmup_ratio: 0.01 +- num_epochs: 15 ### Training results -| Training Loss | Epoch | Step | Validation Loss | Accuracy | -|:-------------:|:------:|:----:|:---------------:|:--------:| -| No log | 0.9664 | 18 | 0.6860 | 0.7584 | -| No log | 1.9866 | 37 | 0.4623 | 0.8020 | -| No log | 2.9530 | 55 | 0.4069 | 0.8188 | -| No log | 3.9732 | 74 | 0.3811 | 0.8356 | -| No log | 4.9933 | 93 | 0.3542 | 0.8356 | -| No log | 5.9597 | 111 | 0.3380 | 0.8658 | -| No log | 6.7651 | 126 | 0.3360 | 0.8691 | +| Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 | +|:-------------:|:-------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:| +| No log | 0.9897 | 24 | 0.9809 | 0.6357 | 0.5920 | 0.6357 | 0.5921 | +| No log | 1.9794 | 48 | 0.7444 | 0.7158 | 0.6992 | 0.7158 | 0.6905 | +| No log | 2.9691 | 72 | 0.6172 | 0.7494 | 0.7438 | 0.7494 | 0.7449 | +| No log | 4.0 | 97 | 0.5431 | 0.7984 | 0.7918 | 0.7984 | 0.7874 | +| No log | 4.9897 | 121 | 0.5269 | 0.8010 | 0.8006 | 0.8010 | 0.7975 | +| No log | 5.9794 | 145 | 0.5811 | 0.7494 | 0.7802 | 0.7494 | 0.7551 | +| No log | 6.9691 | 169 | 0.4408 | 0.8398 | 0.8366 | 0.8398 | 0.8355 | +| No log | 8.0 | 194 | 0.4687 | 0.8191 | 0.8188 | 0.8191 | 0.8168 | +| No log | 8.9897 | 218 | 0.4364 | 0.8475 | 0.8483 | 0.8475 | 0.8474 | +| No log | 9.9794 | 242 | 0.4291 | 0.8579 | 0.8561 | 0.8579 | 0.8568 | +| No log | 10.9691 | 266 | 0.4699 | 0.8501 | 0.8582 | 0.8501 | 0.8528 | +| No log | 12.0 | 291 | 0.4862 | 0.8450 | 0.8536 | 0.8450 | 0.8480 | +| No log | 12.9897 | 315 | 0.4765 | 0.8475 | 0.8497 | 0.8475 | 0.8484 | ### Framework versions - Transformers 4.44.2 -- Pytorch 2.4.0+cu121 -- Datasets 2.21.0 +- Pytorch 2.4.1+cu121 - Tokenizers 0.19.1 diff --git a/checkpoint-121/model.safetensors b/checkpoint-121/model.safetensors index e29faae038fffc5158a7cf3cb010ce498794e987..fe202163d132da27f2e9770dcc07eb16d99c6a47 100644 --- a/checkpoint-121/model.safetensors +++ b/checkpoint-121/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9f17cc1a80f9cdf7f1c5c94c70062ca59ee73223709355f94eae716447e99d02 +oid sha256:61738703583616f26bc5974045f101b4badf661cb1bd5759b55ffedbf42f8309 size 94765560 diff --git a/checkpoint-121/optimizer.pt b/checkpoint-121/optimizer.pt index 76769601d9126bc924a1ab46827d7dfc1c9ff22d..e318b9ab24a4094f4ab5bfb841c4dbba706a697c 100644 --- a/checkpoint-121/optimizer.pt +++ b/checkpoint-121/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:af430e7b5cbd26cc609cc5c918049ec696a387c13101b4292d1bd68dd2695f0e +oid sha256:a04d7aa37706181567dea5d9c17d0a79ae70a3f43b546fd604def5a5c896f325 size 189556666 diff --git a/checkpoint-121/scheduler.pt b/checkpoint-121/scheduler.pt index 1e4892f0948736ed2099e63914ce1d89b8a0af39..43fafcad80ec47eacc2768959ae23c1dd1a9545c 100644 --- a/checkpoint-121/scheduler.pt +++ b/checkpoint-121/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:192b51d0f1e5456c85d7a6eeae550a33132f97495282fa7f439444f5f22297cf +oid sha256:5c4a0a93ff2c02ad8dfec37403a0eff289573d406127ab0a60fb4386bc8bcb2b size 1064 diff --git a/checkpoint-121/trainer_state.json b/checkpoint-121/trainer_state.json index 6962bb91da24b61a27c849f2f364852b7c8b0d88..cb1a830884f63bcb80bc1397fa3a4659f8efaff7 100644 --- a/checkpoint-121/trainer_state.json +++ b/checkpoint-121/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 0.7648578811369509, + "best_metric": 0.8010335917312662, "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121", "epoch": 4.989690721649485, "eval_steps": 500, @@ -10,69 +10,69 @@ "log_history": [ { "epoch": 0.9896907216494846, - "eval_accuracy": 0.6098191214470284, - "eval_f1": 0.5527821682108387, - "eval_loss": 1.0193405151367188, - "eval_precision": 0.7003292379638224, - "eval_recall": 0.6098191214470284, - "eval_runtime": 1.7319, - "eval_samples_per_second": 223.453, - "eval_steps_per_second": 28.293, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, "step": 24 }, { "epoch": 1.9793814432989691, - "eval_accuracy": 0.6795865633074936, - "eval_f1": 0.6650515718181741, - "eval_loss": 0.7780925631523132, - "eval_precision": 0.6877908327609833, - "eval_recall": 0.6795865633074936, - "eval_runtime": 1.7105, - "eval_samples_per_second": 226.247, - "eval_steps_per_second": 28.646, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, "step": 48 }, { "epoch": 2.9690721649484537, - "eval_accuracy": 0.7260981912144703, - "eval_f1": 0.7125387559196679, - "eval_loss": 0.663092315196991, - "eval_precision": 0.7236217153897081, - "eval_recall": 0.7260981912144703, - "eval_runtime": 1.7131, - "eval_samples_per_second": 225.911, - "eval_steps_per_second": 28.604, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, "step": 72 }, { "epoch": 4.0, - "eval_accuracy": 0.7493540051679587, - "eval_f1": 0.7323843170610034, - "eval_loss": 0.5870956778526306, - "eval_precision": 0.7505520883603423, - "eval_recall": 0.7493540051679587, - "eval_runtime": 1.7332, - "eval_samples_per_second": 223.293, - "eval_steps_per_second": 28.272, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, "step": 97 }, { "epoch": 4.989690721649485, - "eval_accuracy": 0.7648578811369509, - "eval_f1": 0.7570271532827265, - "eval_loss": 0.5548774003982544, - "eval_precision": 0.7717080207133444, - "eval_recall": 0.7648578811369509, - "eval_runtime": 1.744, - "eval_samples_per_second": 221.907, - "eval_steps_per_second": 28.097, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, "step": 121 } ], "logging_steps": 500, - "max_steps": 144, + "max_steps": 360, "num_input_tokens_seen": 0, - "num_train_epochs": 6, + "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-121/training_args.bin b/checkpoint-121/training_args.bin index 2523e8b7f8fecd9f3cea42f185f2da0360caee05..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b 100644 --- a/checkpoint-121/training_args.bin +++ b/checkpoint-121/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f758b94689e09a814f1948cbbce56e483b2b53509b111e749d1961a9e2d3ad45 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a size 5240 diff --git a/checkpoint-145/config.json b/checkpoint-145/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d08a8bd80e93984973b2e4be199a10eccbe7cae --- /dev/null +++ b/checkpoint-145/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "finetuning_task": "audio-classification", + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "1s_normal", + "1": "1s_pain", + "2": "1s_hunger", + "3": "1s_asphyxia" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "1s_asphyxia": 3, + "1s_hunger": 2, + "1s_normal": 0, + "1s_pain": 1 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.44.2", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/checkpoint-145/model.safetensors b/checkpoint-145/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..185253bb868e225eac09ed40cbcbcaec575b7826 --- /dev/null +++ b/checkpoint-145/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b1cac8d8be6152086485540750316f43a58660bddd11c298bb50d0f3f5f531 +size 94765560 diff --git a/checkpoint-145/optimizer.pt b/checkpoint-145/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c948e4350a9c78a7a43ea476eb9782b80a480b8c --- /dev/null +++ b/checkpoint-145/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74fa4c7ca2e76ec752e5b913444b39e61cef0b5760afb453b5c55805b47cacc +size 189556666 diff --git a/checkpoint-145/rng_state.pth b/checkpoint-145/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e6f910234c1c71dfa337286bd802c3271b6b027 --- /dev/null +++ b/checkpoint-145/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0867bb3588983088e1ae19ae31c54b18cd181442273a375c356a0362c53a9a5 +size 14308 diff --git a/checkpoint-145/scheduler.pt b/checkpoint-145/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5de74935c2aa899cd7b470bf2beeabc4a904ccb6 --- /dev/null +++ b/checkpoint-145/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62e2160d5bd28532b533e3efabec8ccad052eb12401ff324f1c912ac3e74c929 +size 1064 diff --git a/checkpoint-145/trainer_state.json b/checkpoint-145/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..440dbc35b221b2ca3edea38c4a749c90b6012b2b --- /dev/null +++ b/checkpoint-145/trainer_state.json @@ -0,0 +1,114 @@ +{ + "best_metric": 0.8010335917312662, + "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-121", + "epoch": 5.979381443298969, + "eval_steps": 500, + "global_step": 145, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9896907216494846, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, + "step": 24 + }, + { + "epoch": 1.9793814432989691, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, + "step": 48 + }, + { + "epoch": 2.9690721649484537, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, + "step": 72 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, + "step": 97 + }, + { + "epoch": 4.989690721649485, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, + "step": 121 + }, + { + "epoch": 5.979381443298969, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.7551329793037762, + "eval_loss": 0.5810549259185791, + "eval_precision": 0.7802262423287315, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6999, + "eval_samples_per_second": 227.658, + "eval_steps_per_second": 28.825, + "step": 145 + } + ], + "logging_steps": 500, + "max_steps": 360, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.10820802112e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-145/training_args.bin b/checkpoint-145/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b --- /dev/null +++ b/checkpoint-145/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a +size 5240 diff --git a/checkpoint-169/config.json b/checkpoint-169/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d08a8bd80e93984973b2e4be199a10eccbe7cae --- /dev/null +++ b/checkpoint-169/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "finetuning_task": "audio-classification", + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "1s_normal", + "1": "1s_pain", + "2": "1s_hunger", + "3": "1s_asphyxia" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "1s_asphyxia": 3, + "1s_hunger": 2, + "1s_normal": 0, + "1s_pain": 1 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.44.2", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/checkpoint-169/model.safetensors b/checkpoint-169/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15fce483a6cc42d80e533b5b7361947f4e53e32c --- /dev/null +++ b/checkpoint-169/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052c2fac14563eb5b0b53eb4a3f6509d46dcf61f99ea7c0e5e76df5345230eff +size 94765560 diff --git a/checkpoint-169/optimizer.pt b/checkpoint-169/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0daee7937b05f68590479fc2fc1c36f3c7297280 --- /dev/null +++ b/checkpoint-169/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:672db33956055547a36f8f888cdb0f8124de62210906cbe58b87898baf646fe9 +size 189556666 diff --git a/checkpoint-169/rng_state.pth b/checkpoint-169/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..227ad56e63341ddcfd2891326574cc0c15e6b468 --- /dev/null +++ b/checkpoint-169/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3d85d4b435d4ac4552bb46f32d5f63a55dbc65baaa5af6a14b6b39e968f4b8e +size 14308 diff --git a/checkpoint-169/scheduler.pt b/checkpoint-169/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..65d2d414fbaddb4941c4409c7132cceb94f9d706 --- /dev/null +++ b/checkpoint-169/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a897dacc1b915150336aff74c1f17b3b8504d76168703307724c32fe3bca6896 +size 1064 diff --git a/checkpoint-169/trainer_state.json b/checkpoint-169/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2b12ecec40756a90589561ec7fcbf996eef6b647 --- /dev/null +++ b/checkpoint-169/trainer_state.json @@ -0,0 +1,126 @@ +{ + "best_metric": 0.8397932816537468, + "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169", + "epoch": 6.969072164948454, + "eval_steps": 500, + "global_step": 169, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9896907216494846, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, + "step": 24 + }, + { + "epoch": 1.9793814432989691, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, + "step": 48 + }, + { + "epoch": 2.9690721649484537, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, + "step": 72 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, + "step": 97 + }, + { + "epoch": 4.989690721649485, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, + "step": 121 + }, + { + "epoch": 5.979381443298969, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.7551329793037762, + "eval_loss": 0.5810549259185791, + "eval_precision": 0.7802262423287315, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6999, + "eval_samples_per_second": 227.658, + "eval_steps_per_second": 28.825, + "step": 145 + }, + { + "epoch": 6.969072164948454, + "eval_accuracy": 0.8397932816537468, + "eval_f1": 0.8354840008265724, + "eval_loss": 0.44080850481987, + "eval_precision": 0.8365717854569443, + "eval_recall": 0.8397932816537468, + "eval_runtime": 1.6785, + "eval_samples_per_second": 230.56, + "eval_steps_per_second": 29.192, + "step": 169 + } + ], + "logging_steps": 500, + "max_steps": 360, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.45957602464e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-169/training_args.bin b/checkpoint-169/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b --- /dev/null +++ b/checkpoint-169/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a +size 5240 diff --git a/checkpoint-194/config.json b/checkpoint-194/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d08a8bd80e93984973b2e4be199a10eccbe7cae --- /dev/null +++ b/checkpoint-194/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "finetuning_task": "audio-classification", + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "1s_normal", + "1": "1s_pain", + "2": "1s_hunger", + "3": "1s_asphyxia" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "1s_asphyxia": 3, + "1s_hunger": 2, + "1s_normal": 0, + "1s_pain": 1 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.44.2", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/checkpoint-194/model.safetensors b/checkpoint-194/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fee158e749db398f71cd35dbfbf02b9eb93e6d40 --- /dev/null +++ b/checkpoint-194/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03ee65315496952ebb447ef8a2d41c18b1e5e1649887b43e9d670daab42cd79 +size 94765560 diff --git a/checkpoint-194/optimizer.pt b/checkpoint-194/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ca18e4b0664905deef61994692d9896f07abca2 --- /dev/null +++ b/checkpoint-194/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21b9a71b29339ce815e0272ac73c5e9382e8e4b94e000d00c2c15c46fcaf1bb1 +size 189556666 diff --git a/checkpoint-194/rng_state.pth b/checkpoint-194/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e8dfee6203431b1c5ac7be87a3e7e223f86001ab --- /dev/null +++ b/checkpoint-194/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a98371a8e546887d8a0cf63952365049758fe889acd42fc72eb734ef3af0332 +size 14308 diff --git a/checkpoint-194/scheduler.pt b/checkpoint-194/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..385ccbcb174b6c65ddc163e632a247972f5a6876 --- /dev/null +++ b/checkpoint-194/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf3f9f9e1b079187c143133062b7476177d05ae3b26e39797b7b8deffda481f +size 1064 diff --git a/checkpoint-194/trainer_state.json b/checkpoint-194/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0249b2921e7b7ccb35461b7a2f87856600583480 --- /dev/null +++ b/checkpoint-194/trainer_state.json @@ -0,0 +1,138 @@ +{ + "best_metric": 0.8397932816537468, + "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-169", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 194, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9896907216494846, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, + "step": 24 + }, + { + "epoch": 1.9793814432989691, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, + "step": 48 + }, + { + "epoch": 2.9690721649484537, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, + "step": 72 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, + "step": 97 + }, + { + "epoch": 4.989690721649485, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, + "step": 121 + }, + { + "epoch": 5.979381443298969, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.7551329793037762, + "eval_loss": 0.5810549259185791, + "eval_precision": 0.7802262423287315, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6999, + "eval_samples_per_second": 227.658, + "eval_steps_per_second": 28.825, + "step": 145 + }, + { + "epoch": 6.969072164948454, + "eval_accuracy": 0.8397932816537468, + "eval_f1": 0.8354840008265724, + "eval_loss": 0.44080850481987, + "eval_precision": 0.8365717854569443, + "eval_recall": 0.8397932816537468, + "eval_runtime": 1.6785, + "eval_samples_per_second": 230.56, + "eval_steps_per_second": 29.192, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8191214470284238, + "eval_f1": 0.8167837576120855, + "eval_loss": 0.46871018409729004, + "eval_precision": 0.8187643627479353, + "eval_recall": 0.8191214470284238, + "eval_runtime": 1.6719, + "eval_samples_per_second": 231.477, + "eval_steps_per_second": 29.308, + "step": 194 + } + ], + "logging_steps": 500, + "max_steps": 360, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.81094402816e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-194/training_args.bin b/checkpoint-194/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b --- /dev/null +++ b/checkpoint-194/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a +size 5240 diff --git a/checkpoint-218/config.json b/checkpoint-218/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d08a8bd80e93984973b2e4be199a10eccbe7cae --- /dev/null +++ b/checkpoint-218/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "finetuning_task": "audio-classification", + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "1s_normal", + "1": "1s_pain", + "2": "1s_hunger", + "3": "1s_asphyxia" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "1s_asphyxia": 3, + "1s_hunger": 2, + "1s_normal": 0, + "1s_pain": 1 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.44.2", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/checkpoint-218/model.safetensors b/checkpoint-218/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13ce4bdf09e2f9715c7eae62aab6e56acc214ad6 --- /dev/null +++ b/checkpoint-218/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f22baf06235c468452afb48784bcfe17b8d6b16547f92c897a6b83b9d4be1927 +size 94765560 diff --git a/checkpoint-218/optimizer.pt b/checkpoint-218/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..17124273ff79e335cd8279bb18bd3c4b9587a55e --- /dev/null +++ b/checkpoint-218/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b107c786ae699e053540620707226aced653806a534f7bf2c3d041d5a799d0d +size 189556666 diff --git a/checkpoint-218/rng_state.pth b/checkpoint-218/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d430fc02ebee6a20553d790dd1badb8940dbf287 --- /dev/null +++ b/checkpoint-218/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87f5981b8629d20da8d7bb452f1f8e311534ac9a1fbf9cf6ca9f99f281c14bfa +size 14308 diff --git a/checkpoint-218/scheduler.pt b/checkpoint-218/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..16a8840d7666ecfd9e80be6bcfb9029489e6a627 --- /dev/null +++ b/checkpoint-218/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51016c1626b13f8e0c2274df9c63eb74ef480d103cd1eaea425e0978c8cf392 +size 1064 diff --git a/checkpoint-218/trainer_state.json b/checkpoint-218/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..aead11538c48ffcb57c971db067aed4987b9b2a8 --- /dev/null +++ b/checkpoint-218/trainer_state.json @@ -0,0 +1,150 @@ +{ + "best_metric": 0.8475452196382429, + "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-218", + "epoch": 8.989690721649485, + "eval_steps": 500, + "global_step": 218, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9896907216494846, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, + "step": 24 + }, + { + "epoch": 1.9793814432989691, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, + "step": 48 + }, + { + "epoch": 2.9690721649484537, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, + "step": 72 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, + "step": 97 + }, + { + "epoch": 4.989690721649485, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, + "step": 121 + }, + { + "epoch": 5.979381443298969, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.7551329793037762, + "eval_loss": 0.5810549259185791, + "eval_precision": 0.7802262423287315, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6999, + "eval_samples_per_second": 227.658, + "eval_steps_per_second": 28.825, + "step": 145 + }, + { + "epoch": 6.969072164948454, + "eval_accuracy": 0.8397932816537468, + "eval_f1": 0.8354840008265724, + "eval_loss": 0.44080850481987, + "eval_precision": 0.8365717854569443, + "eval_recall": 0.8397932816537468, + "eval_runtime": 1.6785, + "eval_samples_per_second": 230.56, + "eval_steps_per_second": 29.192, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8191214470284238, + "eval_f1": 0.8167837576120855, + "eval_loss": 0.46871018409729004, + "eval_precision": 0.8187643627479353, + "eval_recall": 0.8191214470284238, + "eval_runtime": 1.6719, + "eval_samples_per_second": 231.477, + "eval_steps_per_second": 29.308, + "step": 194 + }, + { + "epoch": 8.989690721649485, + "eval_accuracy": 0.8475452196382429, + "eval_f1": 0.8473710740005564, + "eval_loss": 0.4363822937011719, + "eval_precision": 0.8483238707679635, + "eval_recall": 0.8475452196382429, + "eval_runtime": 1.6838, + "eval_samples_per_second": 229.835, + "eval_steps_per_second": 29.101, + "step": 218 + } + ], + "logging_steps": 500, + "max_steps": 360, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.16231203168e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-218/training_args.bin b/checkpoint-218/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b --- /dev/null +++ b/checkpoint-218/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a +size 5240 diff --git a/checkpoint-24/model.safetensors b/checkpoint-24/model.safetensors index 2cc85ea190641bef8b98152f2da3ca9bee426a80..ffcd51eabb3214e724dc64bf514a1004f7e05c9b 100644 --- a/checkpoint-24/model.safetensors +++ b/checkpoint-24/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68a1c8e7483216c3afd74e7c8f3bf326f08720513511c39fcdf37be7ae3a0552 +oid sha256:2a3bee6833ba8b12da8d51850147bb3c3b153eec346dfd3b0fda4ff730aa9026 size 94765560 diff --git a/checkpoint-24/optimizer.pt b/checkpoint-24/optimizer.pt index db5a00561c843d9258395ddb3e3c0b7b72fc9d46..6ad6e1857ea6c9ac822c7d44849e96bcb0eba615 100644 --- a/checkpoint-24/optimizer.pt +++ b/checkpoint-24/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea879235e07d8970f1c22eb53e2c7884154e7269b0995315fa875740c3c99644 +oid sha256:856d9a47a6db002c6e55d97a99db231cee10d1fef5c0909f7d527b86c30af746 size 189556666 diff --git a/checkpoint-24/rng_state.pth b/checkpoint-24/rng_state.pth index e15ab93a2b7adc6811c77cbda4f1fd8fd57211fa..3f4c7eea6d1ab45f43fd742de99363e16c650680 100644 --- a/checkpoint-24/rng_state.pth +++ b/checkpoint-24/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1dfbead32b1c4ff8d1271c1899bd626bf08d46cbc21d2769431f03d7912b1368 +oid sha256:202bb63ca18cd0b022bafcf6cf19faa822d2e1dbc2cdaf6b5a9bdcbe21ca7562 size 14308 diff --git a/checkpoint-24/scheduler.pt b/checkpoint-24/scheduler.pt index f8681feeb0e8c9b2d538f351ccebbdbb2c780bba..ecff807dd131006fd5e878baa94fe3be944b2771 100644 --- a/checkpoint-24/scheduler.pt +++ b/checkpoint-24/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3b5edb3a337b55456e10de8e5d582a8acbe41a6ddc5e0ffb6c3463e1e8bd89e +oid sha256:7a45410ab60db58a246b1af794984e39ea7655e4cee627c4cdfe5bd40727f72a size 1064 diff --git a/checkpoint-24/trainer_state.json b/checkpoint-24/trainer_state.json index 4dd062a8c9ea57f0299cf9cfb9b0226ceedd3b59..a7de1ad6a2355a89c71d32c55e5f16ebb263016f 100644 --- a/checkpoint-24/trainer_state.json +++ b/checkpoint-24/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 0.5193798449612403, + "best_metric": 0.6356589147286822, "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-24", "epoch": 0.9896907216494846, "eval_steps": 500, @@ -10,21 +10,21 @@ "log_history": [ { "epoch": 0.9896907216494846, - "eval_accuracy": 0.5193798449612403, - "eval_f1": 0.40212917110679286, - "eval_loss": 1.1225358247756958, - "eval_precision": 0.33630594789914475, - "eval_recall": 0.5193798449612403, - "eval_runtime": 1.6647, - "eval_samples_per_second": 232.47, - "eval_steps_per_second": 29.434, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, "step": 24 } ], "logging_steps": 500, - "max_steps": 24, + "max_steps": 360, "num_input_tokens_seen": 0, - "num_train_epochs": 1, + "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { @@ -42,12 +42,12 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": true + "should_training_stop": false }, "attributes": {} } }, - "total_flos": 3493211996160000.0, + "total_flos": 3513680035200000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null diff --git a/checkpoint-24/training_args.bin b/checkpoint-24/training_args.bin index 1c687097901193e009172e38809112a91f02e766..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b 100644 --- a/checkpoint-24/training_args.bin +++ b/checkpoint-24/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c712b343688a0deff84bdfb1bfec65a5350ebb13af4be1817584730c46e10303 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a size 5240 diff --git a/checkpoint-242/config.json b/checkpoint-242/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d08a8bd80e93984973b2e4be199a10eccbe7cae --- /dev/null +++ b/checkpoint-242/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "finetuning_task": "audio-classification", + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "1s_normal", + "1": "1s_pain", + "2": "1s_hunger", + "3": "1s_asphyxia" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "1s_asphyxia": 3, + "1s_hunger": 2, + "1s_normal": 0, + "1s_pain": 1 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.44.2", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/checkpoint-242/model.safetensors b/checkpoint-242/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0041827f52aa85a36b9347f03f59e15ddaf1e4d8 --- /dev/null +++ b/checkpoint-242/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c3c3acfb39a1584bdad8c4452c6a99cc55d03b67d4754ef55bdc31be94eb5b7 +size 94765560 diff --git a/checkpoint-242/optimizer.pt b/checkpoint-242/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..79fb0e3ece1badee585e4ce1cc78a265ddadcd62 --- /dev/null +++ b/checkpoint-242/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529f05dfd1fdfa07d440221559587dda8eded6dd4e7f89fd777f0ac7f24f5fee +size 189556666 diff --git a/checkpoint-242/rng_state.pth b/checkpoint-242/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b7f7a3447b23568ff182a14cf9d921884fbb1158 --- /dev/null +++ b/checkpoint-242/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e731e4993f8e25825c268c79f8c5e714e7bddf10c95d3c48fd5bec091ada0032 +size 14308 diff --git a/checkpoint-242/scheduler.pt b/checkpoint-242/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..faccd17d26aa0766553e8b5cd550f06b7cd21d2a --- /dev/null +++ b/checkpoint-242/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781bb61f6baa40042311c990cc82713d83cdc179ccb43dbb9cdd148961a2e8ad +size 1064 diff --git a/checkpoint-242/trainer_state.json b/checkpoint-242/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..801c2d99878ef26af3d55b5c807f59c24b89c155 --- /dev/null +++ b/checkpoint-242/trainer_state.json @@ -0,0 +1,162 @@ +{ + "best_metric": 0.8578811369509044, + "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242", + "epoch": 9.97938144329897, + "eval_steps": 500, + "global_step": 242, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9896907216494846, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, + "step": 24 + }, + { + "epoch": 1.9793814432989691, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, + "step": 48 + }, + { + "epoch": 2.9690721649484537, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, + "step": 72 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, + "step": 97 + }, + { + "epoch": 4.989690721649485, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, + "step": 121 + }, + { + "epoch": 5.979381443298969, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.7551329793037762, + "eval_loss": 0.5810549259185791, + "eval_precision": 0.7802262423287315, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6999, + "eval_samples_per_second": 227.658, + "eval_steps_per_second": 28.825, + "step": 145 + }, + { + "epoch": 6.969072164948454, + "eval_accuracy": 0.8397932816537468, + "eval_f1": 0.8354840008265724, + "eval_loss": 0.44080850481987, + "eval_precision": 0.8365717854569443, + "eval_recall": 0.8397932816537468, + "eval_runtime": 1.6785, + "eval_samples_per_second": 230.56, + "eval_steps_per_second": 29.192, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8191214470284238, + "eval_f1": 0.8167837576120855, + "eval_loss": 0.46871018409729004, + "eval_precision": 0.8187643627479353, + "eval_recall": 0.8191214470284238, + "eval_runtime": 1.6719, + "eval_samples_per_second": 231.477, + "eval_steps_per_second": 29.308, + "step": 194 + }, + { + "epoch": 8.989690721649485, + "eval_accuracy": 0.8475452196382429, + "eval_f1": 0.8473710740005564, + "eval_loss": 0.4363822937011719, + "eval_precision": 0.8483238707679635, + "eval_recall": 0.8475452196382429, + "eval_runtime": 1.6838, + "eval_samples_per_second": 229.835, + "eval_steps_per_second": 29.101, + "step": 218 + }, + { + "epoch": 9.97938144329897, + "eval_accuracy": 0.8578811369509044, + "eval_f1": 0.8567532661685897, + "eval_loss": 0.42906680703163147, + "eval_precision": 0.8560504853170988, + "eval_recall": 0.8578811369509044, + "eval_runtime": 1.6875, + "eval_samples_per_second": 229.333, + "eval_steps_per_second": 29.037, + "step": 242 + } + ], + "logging_steps": 500, + "max_steps": 360, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.5136800352e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-242/training_args.bin b/checkpoint-242/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b --- /dev/null +++ b/checkpoint-242/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a +size 5240 diff --git a/checkpoint-266/config.json b/checkpoint-266/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d08a8bd80e93984973b2e4be199a10eccbe7cae --- /dev/null +++ b/checkpoint-266/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "finetuning_task": "audio-classification", + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "1s_normal", + "1": "1s_pain", + "2": "1s_hunger", + "3": "1s_asphyxia" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "1s_asphyxia": 3, + "1s_hunger": 2, + "1s_normal": 0, + "1s_pain": 1 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.44.2", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/checkpoint-266/model.safetensors b/checkpoint-266/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07d73e4395e9cc1d358b6a23ed2fecc1d55fe7c4 --- /dev/null +++ b/checkpoint-266/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40af64761e85d5fea70c649217c8cc140a31cb9ca6b3839f952a2bb9f87e0b2 +size 94765560 diff --git a/checkpoint-266/optimizer.pt b/checkpoint-266/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba21d918eee493f20f66bf6c5f6853394ceb9235 --- /dev/null +++ b/checkpoint-266/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2157435343c02544baabe61713043b7b3749a01b1cc4aeeb2340f89faf040460 +size 189556666 diff --git a/checkpoint-266/rng_state.pth b/checkpoint-266/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b5fc83461b8c9cbc3968a9953e468a35b42e557 --- /dev/null +++ b/checkpoint-266/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73b50615a7bf1da5d9ef8fc068bd4b47076e83ae38947983396a5f0ea07c1805 +size 14308 diff --git a/checkpoint-266/scheduler.pt b/checkpoint-266/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7b1eb99a3029972a14aafefa9216c42f3b5ae05 --- /dev/null +++ b/checkpoint-266/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:431e6d6d13cfa84e3db2d479446a897f9f93f4718bfe0ac4b68d73256eda8d91 +size 1064 diff --git a/checkpoint-266/trainer_state.json b/checkpoint-266/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eca4f36b5ef20f485eb8556f9ad2df18b68193bf --- /dev/null +++ b/checkpoint-266/trainer_state.json @@ -0,0 +1,174 @@ +{ + "best_metric": 0.8578811369509044, + "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242", + "epoch": 10.969072164948454, + "eval_steps": 500, + "global_step": 266, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9896907216494846, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, + "step": 24 + }, + { + "epoch": 1.9793814432989691, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, + "step": 48 + }, + { + "epoch": 2.9690721649484537, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, + "step": 72 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, + "step": 97 + }, + { + "epoch": 4.989690721649485, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, + "step": 121 + }, + { + "epoch": 5.979381443298969, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.7551329793037762, + "eval_loss": 0.5810549259185791, + "eval_precision": 0.7802262423287315, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6999, + "eval_samples_per_second": 227.658, + "eval_steps_per_second": 28.825, + "step": 145 + }, + { + "epoch": 6.969072164948454, + "eval_accuracy": 0.8397932816537468, + "eval_f1": 0.8354840008265724, + "eval_loss": 0.44080850481987, + "eval_precision": 0.8365717854569443, + "eval_recall": 0.8397932816537468, + "eval_runtime": 1.6785, + "eval_samples_per_second": 230.56, + "eval_steps_per_second": 29.192, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8191214470284238, + "eval_f1": 0.8167837576120855, + "eval_loss": 0.46871018409729004, + "eval_precision": 0.8187643627479353, + "eval_recall": 0.8191214470284238, + "eval_runtime": 1.6719, + "eval_samples_per_second": 231.477, + "eval_steps_per_second": 29.308, + "step": 194 + }, + { + "epoch": 8.989690721649485, + "eval_accuracy": 0.8475452196382429, + "eval_f1": 0.8473710740005564, + "eval_loss": 0.4363822937011719, + "eval_precision": 0.8483238707679635, + "eval_recall": 0.8475452196382429, + "eval_runtime": 1.6838, + "eval_samples_per_second": 229.835, + "eval_steps_per_second": 29.101, + "step": 218 + }, + { + "epoch": 9.97938144329897, + "eval_accuracy": 0.8578811369509044, + "eval_f1": 0.8567532661685897, + "eval_loss": 0.42906680703163147, + "eval_precision": 0.8560504853170988, + "eval_recall": 0.8578811369509044, + "eval_runtime": 1.6875, + "eval_samples_per_second": 229.333, + "eval_steps_per_second": 29.037, + "step": 242 + }, + { + "epoch": 10.969072164948454, + "eval_accuracy": 0.8501291989664083, + "eval_f1": 0.8527674409653727, + "eval_loss": 0.46987947821617126, + "eval_precision": 0.858217159873796, + "eval_recall": 0.8501291989664083, + "eval_runtime": 1.6795, + "eval_samples_per_second": 230.425, + "eval_steps_per_second": 29.175, + "step": 266 + } + ], + "logging_steps": 500, + "max_steps": 360, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.86504803872e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-266/training_args.bin b/checkpoint-266/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b --- /dev/null +++ b/checkpoint-266/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a +size 5240 diff --git a/checkpoint-291/config.json b/checkpoint-291/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d08a8bd80e93984973b2e4be199a10eccbe7cae --- /dev/null +++ b/checkpoint-291/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "finetuning_task": "audio-classification", + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "1s_normal", + "1": "1s_pain", + "2": "1s_hunger", + "3": "1s_asphyxia" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "1s_asphyxia": 3, + "1s_hunger": 2, + "1s_normal": 0, + "1s_pain": 1 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.44.2", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/checkpoint-291/model.safetensors b/checkpoint-291/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bcddfdca54e821d99b50240e69a342acc39e58f --- /dev/null +++ b/checkpoint-291/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47d00b50afe8f783e4bf6f910a770b985446e9c936205d750ce95a2c463cbf3 +size 94765560 diff --git a/checkpoint-291/optimizer.pt b/checkpoint-291/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9230c36ee52a86ff02bf2e5bae31d6a33a931c3f --- /dev/null +++ b/checkpoint-291/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c1f039c3b8800766242328eaa5d76f19b0a28b425876e5045d7114823501fe +size 189556666 diff --git a/checkpoint-291/rng_state.pth b/checkpoint-291/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0efe295a827e11251738e66697794e22b010d43b --- /dev/null +++ b/checkpoint-291/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd40f07c2376dd9f531852b4c390736ede72c8749139c12e76d1f290940cff3a +size 14308 diff --git a/checkpoint-291/scheduler.pt b/checkpoint-291/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7709bdb805bd951b521a884882c1b2b7220a208 --- /dev/null +++ b/checkpoint-291/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4fa6a80ced34d80d85c2cda8f22c82979b4cdae444e81fcc24e1cd53d6a4cf2 +size 1064 diff --git a/checkpoint-291/trainer_state.json b/checkpoint-291/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..31f2d2ec9dc4b9e372a88c4c20a316c68960a763 --- /dev/null +++ b/checkpoint-291/trainer_state.json @@ -0,0 +1,186 @@ +{ + "best_metric": 0.8578811369509044, + "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242", + "epoch": 12.0, + "eval_steps": 500, + "global_step": 291, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9896907216494846, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, + "step": 24 + }, + { + "epoch": 1.9793814432989691, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, + "step": 48 + }, + { + "epoch": 2.9690721649484537, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, + "step": 72 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, + "step": 97 + }, + { + "epoch": 4.989690721649485, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, + "step": 121 + }, + { + "epoch": 5.979381443298969, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.7551329793037762, + "eval_loss": 0.5810549259185791, + "eval_precision": 0.7802262423287315, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6999, + "eval_samples_per_second": 227.658, + "eval_steps_per_second": 28.825, + "step": 145 + }, + { + "epoch": 6.969072164948454, + "eval_accuracy": 0.8397932816537468, + "eval_f1": 0.8354840008265724, + "eval_loss": 0.44080850481987, + "eval_precision": 0.8365717854569443, + "eval_recall": 0.8397932816537468, + "eval_runtime": 1.6785, + "eval_samples_per_second": 230.56, + "eval_steps_per_second": 29.192, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8191214470284238, + "eval_f1": 0.8167837576120855, + "eval_loss": 0.46871018409729004, + "eval_precision": 0.8187643627479353, + "eval_recall": 0.8191214470284238, + "eval_runtime": 1.6719, + "eval_samples_per_second": 231.477, + "eval_steps_per_second": 29.308, + "step": 194 + }, + { + "epoch": 8.989690721649485, + "eval_accuracy": 0.8475452196382429, + "eval_f1": 0.8473710740005564, + "eval_loss": 0.4363822937011719, + "eval_precision": 0.8483238707679635, + "eval_recall": 0.8475452196382429, + "eval_runtime": 1.6838, + "eval_samples_per_second": 229.835, + "eval_steps_per_second": 29.101, + "step": 218 + }, + { + "epoch": 9.97938144329897, + "eval_accuracy": 0.8578811369509044, + "eval_f1": 0.8567532661685897, + "eval_loss": 0.42906680703163147, + "eval_precision": 0.8560504853170988, + "eval_recall": 0.8578811369509044, + "eval_runtime": 1.6875, + "eval_samples_per_second": 229.333, + "eval_steps_per_second": 29.037, + "step": 242 + }, + { + "epoch": 10.969072164948454, + "eval_accuracy": 0.8501291989664083, + "eval_f1": 0.8527674409653727, + "eval_loss": 0.46987947821617126, + "eval_precision": 0.858217159873796, + "eval_recall": 0.8501291989664083, + "eval_runtime": 1.6795, + "eval_samples_per_second": 230.425, + "eval_steps_per_second": 29.175, + "step": 266 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8449612403100775, + "eval_f1": 0.8479775479649662, + "eval_loss": 0.48616188764572144, + "eval_precision": 0.8536273463382107, + "eval_recall": 0.8449612403100775, + "eval_runtime": 1.6948, + "eval_samples_per_second": 228.339, + "eval_steps_per_second": 28.911, + "step": 291 + } + ], + "logging_steps": 500, + "max_steps": 360, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.21641604224e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-291/training_args.bin b/checkpoint-291/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b --- /dev/null +++ b/checkpoint-291/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a +size 5240 diff --git a/checkpoint-315/config.json b/checkpoint-315/config.json new file mode 100644 index 0000000000000000000000000000000000000000..4d08a8bd80e93984973b2e4be199a10eccbe7cae --- /dev/null +++ b/checkpoint-315/config.json @@ -0,0 +1,85 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "finetuning_task": "audio-classification", + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "1s_normal", + "1": "1s_pain", + "2": "1s_hunger", + "3": "1s_asphyxia" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "1s_asphyxia": 3, + "1s_hunger": 2, + "1s_normal": 0, + "1s_pain": 1 + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.44.2", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/checkpoint-315/model.safetensors b/checkpoint-315/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6a63ad1353d2ace718d340dbf3137e721e2cac9 --- /dev/null +++ b/checkpoint-315/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1700c80d575b455dce88b4e3c764090727ae02d64f4125396c1001f9bd02bd57 +size 94765560 diff --git a/checkpoint-315/optimizer.pt b/checkpoint-315/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..08e1c4692e11e6a5925040b107998bdee8a896b0 --- /dev/null +++ b/checkpoint-315/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fca7dfcd62a0d46fcb8d64a5382493902a875243c0bf05e743cbde3de662707e +size 189556666 diff --git a/checkpoint-315/rng_state.pth b/checkpoint-315/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f9ad45a99c9f7e37ab15211ea53cf17239185318 --- /dev/null +++ b/checkpoint-315/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e8cd9244ffbcc048ec9f328b6b90e20f393979c69ae6a39c033f1771f269e4 +size 14308 diff --git a/checkpoint-315/scheduler.pt b/checkpoint-315/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..417daef62ccf256c321c79c09e5beb05f8bc7968 --- /dev/null +++ b/checkpoint-315/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:956285c3102bd85589a4c94d7acca82629b8e4407131906dba6bfe9c2568df92 +size 1064 diff --git a/checkpoint-315/trainer_state.json b/checkpoint-315/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d710c1aaab04bdafc61441c2dd8c97d00cae562b --- /dev/null +++ b/checkpoint-315/trainer_state.json @@ -0,0 +1,198 @@ +{ + "best_metric": 0.8578811369509044, + "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-242", + "epoch": 12.989690721649485, + "eval_steps": 500, + "global_step": 315, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.9896907216494846, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, + "step": 24 + }, + { + "epoch": 1.9793814432989691, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, + "step": 48 + }, + { + "epoch": 2.9690721649484537, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, + "step": 72 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, + "step": 97 + }, + { + "epoch": 4.989690721649485, + "eval_accuracy": 0.8010335917312662, + "eval_f1": 0.7974946178390901, + "eval_loss": 0.5268548130989075, + "eval_precision": 0.8005965453214461, + "eval_recall": 0.8010335917312662, + "eval_runtime": 1.6829, + "eval_samples_per_second": 229.957, + "eval_steps_per_second": 29.116, + "step": 121 + }, + { + "epoch": 5.979381443298969, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.7551329793037762, + "eval_loss": 0.5810549259185791, + "eval_precision": 0.7802262423287315, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6999, + "eval_samples_per_second": 227.658, + "eval_steps_per_second": 28.825, + "step": 145 + }, + { + "epoch": 6.969072164948454, + "eval_accuracy": 0.8397932816537468, + "eval_f1": 0.8354840008265724, + "eval_loss": 0.44080850481987, + "eval_precision": 0.8365717854569443, + "eval_recall": 0.8397932816537468, + "eval_runtime": 1.6785, + "eval_samples_per_second": 230.56, + "eval_steps_per_second": 29.192, + "step": 169 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8191214470284238, + "eval_f1": 0.8167837576120855, + "eval_loss": 0.46871018409729004, + "eval_precision": 0.8187643627479353, + "eval_recall": 0.8191214470284238, + "eval_runtime": 1.6719, + "eval_samples_per_second": 231.477, + "eval_steps_per_second": 29.308, + "step": 194 + }, + { + "epoch": 8.989690721649485, + "eval_accuracy": 0.8475452196382429, + "eval_f1": 0.8473710740005564, + "eval_loss": 0.4363822937011719, + "eval_precision": 0.8483238707679635, + "eval_recall": 0.8475452196382429, + "eval_runtime": 1.6838, + "eval_samples_per_second": 229.835, + "eval_steps_per_second": 29.101, + "step": 218 + }, + { + "epoch": 9.97938144329897, + "eval_accuracy": 0.8578811369509044, + "eval_f1": 0.8567532661685897, + "eval_loss": 0.42906680703163147, + "eval_precision": 0.8560504853170988, + "eval_recall": 0.8578811369509044, + "eval_runtime": 1.6875, + "eval_samples_per_second": 229.333, + "eval_steps_per_second": 29.037, + "step": 242 + }, + { + "epoch": 10.969072164948454, + "eval_accuracy": 0.8501291989664083, + "eval_f1": 0.8527674409653727, + "eval_loss": 0.46987947821617126, + "eval_precision": 0.858217159873796, + "eval_recall": 0.8501291989664083, + "eval_runtime": 1.6795, + "eval_samples_per_second": 230.425, + "eval_steps_per_second": 29.175, + "step": 266 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.8449612403100775, + "eval_f1": 0.8479775479649662, + "eval_loss": 0.48616188764572144, + "eval_precision": 0.8536273463382107, + "eval_recall": 0.8449612403100775, + "eval_runtime": 1.6948, + "eval_samples_per_second": 228.339, + "eval_steps_per_second": 28.911, + "step": 291 + }, + { + "epoch": 12.989690721649485, + "eval_accuracy": 0.8475452196382429, + "eval_f1": 0.8483528512434715, + "eval_loss": 0.4765341877937317, + "eval_precision": 0.8497431189750078, + "eval_recall": 0.8475452196382429, + "eval_runtime": 1.688, + "eval_samples_per_second": 229.264, + "eval_steps_per_second": 29.028, + "step": 315 + } + ], + "logging_steps": 500, + "max_steps": 360, + "num_input_tokens_seen": 0, + "num_train_epochs": 15, + "save_steps": 500, + "stateful_callbacks": { + "EarlyStoppingCallback": { + "args": { + "early_stopping_patience": 3, + "early_stopping_threshold": 0.0 + }, + "attributes": { + "early_stopping_patience_counter": 0 + } + }, + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.56778404576e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-315/training_args.bin b/checkpoint-315/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b --- /dev/null +++ b/checkpoint-315/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a +size 5240 diff --git a/checkpoint-48/model.safetensors b/checkpoint-48/model.safetensors index 0e6c8c07900c4578273e75afbae440116b3762cc..20498fe6d84fb8ca8a4d7a68edfd0348d564a227 100644 --- a/checkpoint-48/model.safetensors +++ b/checkpoint-48/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b70579301bd070215facd604eef3fa6085c3d36b83a56b214fd361987a8b0a0b +oid sha256:874cfd79e7f8f79117c607a043371e2575cea36eabf3fab57dfe6214765f959e size 94765560 diff --git a/checkpoint-48/optimizer.pt b/checkpoint-48/optimizer.pt index 9daa0248beaa6d5803071488502a0da86d9d310b..e4e96a610b834cb7257fbca00334d1b4348e9185 100644 --- a/checkpoint-48/optimizer.pt +++ b/checkpoint-48/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b36c98b895fb661cc797380b744ef81a3388846135aec85c75e9110fc5ca2c05 +oid sha256:d792dfa624618623c122f4b38eba2876c358b26c734e775720bedeee75004233 size 189556666 diff --git a/checkpoint-48/scheduler.pt b/checkpoint-48/scheduler.pt index 03a1b943a00ed16e6f322c21189180347b45a17a..9f5b55d6fc0d66fc26089eb16e987bdc26e2e10b 100644 --- a/checkpoint-48/scheduler.pt +++ b/checkpoint-48/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dbf38641e9fb31561b293062c60265089034749c9b71d81c07e3d707dcb1ab51 +oid sha256:ee8b157b5f1b2ae4a4ed28de279e432d640c2b1a56a597116f40e8a0eb7df070 size 1064 diff --git a/checkpoint-48/trainer_state.json b/checkpoint-48/trainer_state.json index d67e0f3ca64729fa303e9afcd2670318c593656b..6e4a0ac67a528ff603ef2480f9f63b866a3d8aca 100644 --- a/checkpoint-48/trainer_state.json +++ b/checkpoint-48/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 0.6795865633074936, + "best_metric": 0.7157622739018088, "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-48", "epoch": 1.9793814432989691, "eval_steps": 500, @@ -10,33 +10,33 @@ "log_history": [ { "epoch": 0.9896907216494846, - "eval_accuracy": 0.6098191214470284, - "eval_f1": 0.5527821682108387, - "eval_loss": 1.0193405151367188, - "eval_precision": 0.7003292379638224, - "eval_recall": 0.6098191214470284, - "eval_runtime": 1.7319, - "eval_samples_per_second": 223.453, - "eval_steps_per_second": 28.293, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, "step": 24 }, { "epoch": 1.9793814432989691, - "eval_accuracy": 0.6795865633074936, - "eval_f1": 0.6650515718181741, - "eval_loss": 0.7780925631523132, - "eval_precision": 0.6877908327609833, - "eval_recall": 0.6795865633074936, - "eval_runtime": 1.7105, - "eval_samples_per_second": 226.247, - "eval_steps_per_second": 28.646, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, "step": 48 } ], "logging_steps": 500, - "max_steps": 144, + "max_steps": 360, "num_input_tokens_seen": 0, - "num_train_epochs": 6, + "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-48/training_args.bin b/checkpoint-48/training_args.bin index 2523e8b7f8fecd9f3cea42f185f2da0360caee05..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b 100644 --- a/checkpoint-48/training_args.bin +++ b/checkpoint-48/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f758b94689e09a814f1948cbbce56e483b2b53509b111e749d1961a9e2d3ad45 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a size 5240 diff --git a/checkpoint-72/model.safetensors b/checkpoint-72/model.safetensors index 0890f74dcfe427d4cbf6893a05da7b738a7698b0..271923b7cf9bf3242df0aecae477dba228179fda 100644 --- a/checkpoint-72/model.safetensors +++ b/checkpoint-72/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a3c6d8a9bb511a47d9f8c8d94ca004e0d3631edd77a234c31b38772da2cbfab0 +oid sha256:7319a5906007abccfee6b3903d9644c4f1555218c42143551e3fdc5ddafdaeff size 94765560 diff --git a/checkpoint-72/optimizer.pt b/checkpoint-72/optimizer.pt index ed7358647a39c51a218444c3a1bfb60ddae63c63..de1ecf9b8c0a9324ef5cc069190ba31f0e39c81c 100644 --- a/checkpoint-72/optimizer.pt +++ b/checkpoint-72/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8a01477828151d9302ffb883445b27ed488c6fab05edef4659327e7b85b82a57 +oid sha256:07d214f9dd1fc10263bc5278f794a09463e71a214c7c58fea77ceb2220d3d450 size 189556666 diff --git a/checkpoint-72/scheduler.pt b/checkpoint-72/scheduler.pt index 0a41468999f14bccc3e83219639ebbf2c3160fe3..115cbbf4e2489019ed0d6b5304279798721f4896 100644 --- a/checkpoint-72/scheduler.pt +++ b/checkpoint-72/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:395fbbe365895cda4ce756be4692252071ddfad60f8f48069469260e726f6b62 +oid sha256:ee44eb10c5d626424dbeae30798ab223335ff2f90371a9f029e92a7d6ba8ece6 size 1064 diff --git a/checkpoint-72/trainer_state.json b/checkpoint-72/trainer_state.json index e74dd7925aac8be5cbdd8f83bd587957779f520c..6ab48df1c49dbbd9c7f8a18024aefd75423db344 100644 --- a/checkpoint-72/trainer_state.json +++ b/checkpoint-72/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 0.7260981912144703, + "best_metric": 0.7493540051679587, "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-72", "epoch": 2.9690721649484537, "eval_steps": 500, @@ -10,45 +10,45 @@ "log_history": [ { "epoch": 0.9896907216494846, - "eval_accuracy": 0.6098191214470284, - "eval_f1": 0.5527821682108387, - "eval_loss": 1.0193405151367188, - "eval_precision": 0.7003292379638224, - "eval_recall": 0.6098191214470284, - "eval_runtime": 1.7319, - "eval_samples_per_second": 223.453, - "eval_steps_per_second": 28.293, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, "step": 24 }, { "epoch": 1.9793814432989691, - "eval_accuracy": 0.6795865633074936, - "eval_f1": 0.6650515718181741, - "eval_loss": 0.7780925631523132, - "eval_precision": 0.6877908327609833, - "eval_recall": 0.6795865633074936, - "eval_runtime": 1.7105, - "eval_samples_per_second": 226.247, - "eval_steps_per_second": 28.646, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, "step": 48 }, { "epoch": 2.9690721649484537, - "eval_accuracy": 0.7260981912144703, - "eval_f1": 0.7125387559196679, - "eval_loss": 0.663092315196991, - "eval_precision": 0.7236217153897081, - "eval_recall": 0.7260981912144703, - "eval_runtime": 1.7131, - "eval_samples_per_second": 225.911, - "eval_steps_per_second": 28.604, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, "step": 72 } ], "logging_steps": 500, - "max_steps": 144, + "max_steps": 360, "num_input_tokens_seen": 0, - "num_train_epochs": 6, + "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-72/training_args.bin b/checkpoint-72/training_args.bin index 2523e8b7f8fecd9f3cea42f185f2da0360caee05..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b 100644 --- a/checkpoint-72/training_args.bin +++ b/checkpoint-72/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f758b94689e09a814f1948cbbce56e483b2b53509b111e749d1961a9e2d3ad45 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a size 5240 diff --git a/checkpoint-97/model.safetensors b/checkpoint-97/model.safetensors index dbe646655262dd95be2e72abaf9c10cc270bb21e..0bf1c336648818fb51a4ab6d5aa24e98ee8bed64 100644 --- a/checkpoint-97/model.safetensors +++ b/checkpoint-97/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4eb4411294ffdebd3325f2c2a9fe8bb8da45ecdf21e27ea71487650e32342e3f +oid sha256:c8acff5615edba2b47fa24da4e5111226931a4f7f775b8443933bf1c1b200335 size 94765560 diff --git a/checkpoint-97/optimizer.pt b/checkpoint-97/optimizer.pt index 357ec891ddc76898525937cdc42f9c78819c5024..35a430dc7f2d62a87af836be4601c6becb49935c 100644 --- a/checkpoint-97/optimizer.pt +++ b/checkpoint-97/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6c079ffaa2b8e6e0267bd097fcdd02618cec3d1413fc09b575441e5c90b1bc5e +oid sha256:52085d7c99568c3e2c34ca5fececf157702a633d83d4c7f211d0c308def9c024 size 189556666 diff --git a/checkpoint-97/scheduler.pt b/checkpoint-97/scheduler.pt index 360f3b71ab031191eae32fb5a7fd79fe59f2db1b..7cf43d8d7f6a3bbbc42d3c25e148cfdfb826db1e 100644 --- a/checkpoint-97/scheduler.pt +++ b/checkpoint-97/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25774c9bc4970b842f1daad063c77fc68915a49c8d8ccb103e1082b3b05e435f +oid sha256:bfb2ee1b3c00531eb539efa70cc744774aea554bf4f4128c6f6edb7b1470f35d size 1064 diff --git a/checkpoint-97/trainer_state.json b/checkpoint-97/trainer_state.json index 944c5d3496ea53c59d2a73e5d683018a2bec0ffb..36bec18dd878abab489b1bc852c1a299d6cfc2e9 100644 --- a/checkpoint-97/trainer_state.json +++ b/checkpoint-97/trainer_state.json @@ -1,5 +1,5 @@ { - "best_metric": 0.7493540051679587, + "best_metric": 0.7984496124031008, "best_model_checkpoint": "distilhubert-finetuned-mixed-data/checkpoint-97", "epoch": 4.0, "eval_steps": 500, @@ -10,57 +10,57 @@ "log_history": [ { "epoch": 0.9896907216494846, - "eval_accuracy": 0.6098191214470284, - "eval_f1": 0.5527821682108387, - "eval_loss": 1.0193405151367188, - "eval_precision": 0.7003292379638224, - "eval_recall": 0.6098191214470284, - "eval_runtime": 1.7319, - "eval_samples_per_second": 223.453, - "eval_steps_per_second": 28.293, + "eval_accuracy": 0.6356589147286822, + "eval_f1": 0.5920563016978556, + "eval_loss": 0.980873703956604, + "eval_precision": 0.5920482291587493, + "eval_recall": 0.6356589147286822, + "eval_runtime": 1.6668, + "eval_samples_per_second": 232.175, + "eval_steps_per_second": 29.397, "step": 24 }, { "epoch": 1.9793814432989691, - "eval_accuracy": 0.6795865633074936, - "eval_f1": 0.6650515718181741, - "eval_loss": 0.7780925631523132, - "eval_precision": 0.6877908327609833, - "eval_recall": 0.6795865633074936, - "eval_runtime": 1.7105, - "eval_samples_per_second": 226.247, - "eval_steps_per_second": 28.646, + "eval_accuracy": 0.7157622739018088, + "eval_f1": 0.6905410405322238, + "eval_loss": 0.7444477081298828, + "eval_precision": 0.6992377248989063, + "eval_recall": 0.7157622739018088, + "eval_runtime": 1.6941, + "eval_samples_per_second": 228.443, + "eval_steps_per_second": 28.924, "step": 48 }, { "epoch": 2.9690721649484537, - "eval_accuracy": 0.7260981912144703, - "eval_f1": 0.7125387559196679, - "eval_loss": 0.663092315196991, - "eval_precision": 0.7236217153897081, - "eval_recall": 0.7260981912144703, - "eval_runtime": 1.7131, - "eval_samples_per_second": 225.911, - "eval_steps_per_second": 28.604, + "eval_accuracy": 0.7493540051679587, + "eval_f1": 0.744898505571463, + "eval_loss": 0.6171658039093018, + "eval_precision": 0.7437592422989429, + "eval_recall": 0.7493540051679587, + "eval_runtime": 1.6943, + "eval_samples_per_second": 228.408, + "eval_steps_per_second": 28.92, "step": 72 }, { "epoch": 4.0, - "eval_accuracy": 0.7493540051679587, - "eval_f1": 0.7323843170610034, - "eval_loss": 0.5870956778526306, - "eval_precision": 0.7505520883603423, - "eval_recall": 0.7493540051679587, - "eval_runtime": 1.7332, - "eval_samples_per_second": 223.293, - "eval_steps_per_second": 28.272, + "eval_accuracy": 0.7984496124031008, + "eval_f1": 0.7873621619744228, + "eval_loss": 0.5430988073348999, + "eval_precision": 0.79180344284319, + "eval_recall": 0.7984496124031008, + "eval_runtime": 1.7027, + "eval_samples_per_second": 227.289, + "eval_steps_per_second": 28.778, "step": 97 } ], "logging_steps": 500, - "max_steps": 144, + "max_steps": 360, "num_input_tokens_seen": 0, - "num_train_epochs": 6, + "num_train_epochs": 15, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { diff --git a/checkpoint-97/training_args.bin b/checkpoint-97/training_args.bin index 2523e8b7f8fecd9f3cea42f185f2da0360caee05..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b 100644 --- a/checkpoint-97/training_args.bin +++ b/checkpoint-97/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f758b94689e09a814f1948cbbce56e483b2b53509b111e749d1961a9e2d3ad45 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a size 5240 diff --git a/model.safetensors b/model.safetensors index 2cc85ea190641bef8b98152f2da3ca9bee426a80..0041827f52aa85a36b9347f03f59e15ddaf1e4d8 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68a1c8e7483216c3afd74e7c8f3bf326f08720513511c39fcdf37be7ae3a0552 +oid sha256:2c3c3acfb39a1584bdad8c4452c6a99cc55d03b67d4754ef55bdc31be94eb5b7 size 94765560 diff --git a/runs/Sep08_19-46-50_ubumarcos/events.out.tfevents.1725817611.ubumarcos b/runs/Sep08_19-46-50_ubumarcos/events.out.tfevents.1725817611.ubumarcos new file mode 100644 index 0000000000000000000000000000000000000000..2f94692f977058e2f969a0a403e270423ce4bf29 --- /dev/null +++ b/runs/Sep08_19-46-50_ubumarcos/events.out.tfevents.1725817611.ubumarcos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b100256c5b6e543211a4ab1927ecae4dd1fc13e9bc42036f77348778542cb5 +size 6398 diff --git a/runs/Sep08_19-47-51_ubumarcos/events.out.tfevents.1725817672.ubumarcos b/runs/Sep08_19-47-51_ubumarcos/events.out.tfevents.1725817672.ubumarcos new file mode 100644 index 0000000000000000000000000000000000000000..9d4d2b064c50959add958d4a960c4fb1f84ffc40 --- /dev/null +++ b/runs/Sep08_19-47-51_ubumarcos/events.out.tfevents.1725817672.ubumarcos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94301ffce219b59fac872068b7e35f510c4c39539613e42e478bf4d8d2538b8a +size 12379 diff --git a/training_args.bin b/training_args.bin index 1c687097901193e009172e38809112a91f02e766..325f766b25f78ee4d9ab23a2afd6556cddd3dd1b 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c712b343688a0deff84bdfb1bfec65a5350ebb13af4be1817584730c46e10303 +oid sha256:347144964cf2be899f8dce858b11b47f463178200e095ec0ecada560c69f675a size 5240