diff --git a/model.safetensors b/model.safetensors
index 752491f7e9a90968c77bc08cc825b1e7078af1c1..72f666f019d40bfdb652cff79194afd25ff5427b 100644
--- a/model.safetensors
+++ b/model.safetensors
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:1a4e3a61c5a6819d9bd9ba23fcdad4d09bf568b77df4a37cbcd3bed482949b91
+oid sha256:4f8ec01152500593829ff0f0c7a3be8665dc76acbde1ff542cbd5ec4be62d9ca
size 470641664
diff --git a/run-0/checkpoint-1263/config.json b/run-0/checkpoint-1263/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-0/checkpoint-1263/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-0/checkpoint-1263/model.safetensors b/run-0/checkpoint-1263/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d5958d53d662d08b64bf835d472da3678c565c68
--- /dev/null
+++ b/run-0/checkpoint-1263/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4987a425ece4f97ebe2a570e48267111ee2e5f8555f34b22192ce05ee5b28475
+size 470641664
diff --git a/run-0/checkpoint-1263/optimizer.pt b/run-0/checkpoint-1263/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1306b8c7fedcb0bdd29d180ac03fad4f63ac4f5
--- /dev/null
+++ b/run-0/checkpoint-1263/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c259aa99cb87a38d42083d7f39a4d829ba52d2cfe4b7e86e6872f24b073cc6a
+size 941404410
diff --git a/run-0/checkpoint-1263/rng_state.pth b/run-0/checkpoint-1263/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..4299e86d9655e2ea1f08c1a17e1d2b37ffcfa5b7
--- /dev/null
+++ b/run-0/checkpoint-1263/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3280fcfac134eaf29462f4ce7a21396d7df8855c1378766cbb68faab277b0305
+size 14308
diff --git a/run-0/checkpoint-1263/scheduler.pt b/run-0/checkpoint-1263/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6403639afd30e2d7c1d826b4fe73e05cf72034d1
--- /dev/null
+++ b/run-0/checkpoint-1263/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:221737016b1b6dc7abbc95f0bd7e97af0f963544cad8ce359faa9e1ac3d0b1aa
+size 1064
diff --git a/run-0/checkpoint-1263/sentencepiece.bpe.model b/run-0/checkpoint-1263/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-0/checkpoint-1263/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-0/checkpoint-1263/special_tokens_map.json b/run-0/checkpoint-1263/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-0/checkpoint-1263/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-0/checkpoint-1263/tokenizer.json b/run-0/checkpoint-1263/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-0/checkpoint-1263/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-0/checkpoint-1263/tokenizer_config.json b/run-0/checkpoint-1263/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-0/checkpoint-1263/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-0/checkpoint-1263/trainer_state.json b/run-0/checkpoint-1263/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..73218ee90a945a8561e3dbd3f10dafc5bd4f390c
--- /dev/null
+++ b/run-0/checkpoint-1263/trainer_state.json
@@ -0,0 +1,67 @@
+{
+ "best_metric": 0.8337155963302753,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-0/checkpoint-1263",
+ "epoch": 3.0,
+ "eval_steps": 500,
+ "global_step": 1263,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8119266055045872,
+ "eval_loss": 0.4295983910560608,
+ "eval_runtime": 1.0076,
+ "eval_samples_per_second": 865.424,
+ "eval_steps_per_second": 54.585,
+ "step": 421
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 8.123950004577637,
+ "learning_rate": 1.3357804670815412e-05,
+ "loss": 0.4991,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.823394495412844,
+ "eval_loss": 0.39807233214378357,
+ "eval_runtime": 1.0042,
+ "eval_samples_per_second": 868.388,
+ "eval_steps_per_second": 54.772,
+ "step": 842
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 5.427116870880127,
+ "learning_rate": 9.196494804517776e-06,
+ "loss": 0.3654,
+ "step": 1000
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8337155963302753,
+ "eval_loss": 0.40886953473091125,
+ "eval_runtime": 0.9135,
+ "eval_samples_per_second": 954.617,
+ "eval_steps_per_second": 60.211,
+ "step": 1263
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2105,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 85626186396660.0,
+ "train_batch_size": 16,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.751911453711305e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 16,
+ "seed": 24
+ }
+}
diff --git a/run-0/checkpoint-1263/training_args.bin b/run-0/checkpoint-1263/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ddb41ce8abf80a6a2ba39fd74cf0bd8a80a36c19
--- /dev/null
+++ b/run-0/checkpoint-1263/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce3aa54e4b5eb040628e4ef793b06ccd6e1dd48fa8ded6edc38a1248d8d9cd9f
+size 4984
diff --git a/run-0/checkpoint-1684/config.json b/run-0/checkpoint-1684/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-0/checkpoint-1684/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-0/checkpoint-1684/model.safetensors b/run-0/checkpoint-1684/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bfac166aec88da8ea327717f45469368e75a1884
--- /dev/null
+++ b/run-0/checkpoint-1684/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0995598b12d9b4e71a35fc146a0ccb4cca76512ee74492ae8bc6a414279e4315
+size 470641664
diff --git a/run-0/checkpoint-1684/optimizer.pt b/run-0/checkpoint-1684/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a6c297400a2f0dee8fa75844194cb5f14c1af69
--- /dev/null
+++ b/run-0/checkpoint-1684/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7a6770a177be40542b9ec042db1c4a9c45fc29c4547822c8291a2e114ad7df9
+size 941404410
diff --git a/run-0/checkpoint-1684/rng_state.pth b/run-0/checkpoint-1684/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..c0782121e1a8593bf9d18d6eaaf237d60efe5c53
--- /dev/null
+++ b/run-0/checkpoint-1684/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a97183ff237de4ad9b87e4c307aae93f175896bded4b871fa77c0ae10a367ce
+size 14308
diff --git a/run-0/checkpoint-1684/scheduler.pt b/run-0/checkpoint-1684/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0c5e573a266bd1feb0c52b3014bd6d2d46cfd7a
--- /dev/null
+++ b/run-0/checkpoint-1684/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a8840b82fde56318f170d55f6783322ff533f0a993f62f256e7c6300fef09ea
+size 1064
diff --git a/run-0/checkpoint-1684/sentencepiece.bpe.model b/run-0/checkpoint-1684/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-0/checkpoint-1684/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-0/checkpoint-1684/special_tokens_map.json b/run-0/checkpoint-1684/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-0/checkpoint-1684/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-0/checkpoint-1684/tokenizer.json b/run-0/checkpoint-1684/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-0/checkpoint-1684/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-0/checkpoint-1684/tokenizer_config.json b/run-0/checkpoint-1684/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-0/checkpoint-1684/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-0/checkpoint-1684/trainer_state.json b/run-0/checkpoint-1684/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d3c087c4e5e7febb7d20f31e483ee83a90888c04
--- /dev/null
+++ b/run-0/checkpoint-1684/trainer_state.json
@@ -0,0 +1,83 @@
+{
+ "best_metric": 0.8405963302752294,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-0/checkpoint-1684",
+ "epoch": 4.0,
+ "eval_steps": 500,
+ "global_step": 1684,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8119266055045872,
+ "eval_loss": 0.4295983910560608,
+ "eval_runtime": 1.0076,
+ "eval_samples_per_second": 865.424,
+ "eval_steps_per_second": 54.585,
+ "step": 421
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 8.123950004577637,
+ "learning_rate": 1.3357804670815412e-05,
+ "loss": 0.4991,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.823394495412844,
+ "eval_loss": 0.39807233214378357,
+ "eval_runtime": 1.0042,
+ "eval_samples_per_second": 868.388,
+ "eval_steps_per_second": 54.772,
+ "step": 842
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 5.427116870880127,
+ "learning_rate": 9.196494804517776e-06,
+ "loss": 0.3654,
+ "step": 1000
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8337155963302753,
+ "eval_loss": 0.40886953473091125,
+ "eval_runtime": 0.9135,
+ "eval_samples_per_second": 954.617,
+ "eval_steps_per_second": 60.211,
+ "step": 1263
+ },
+ {
+ "epoch": 3.56,
+ "grad_norm": 1.0015597343444824,
+ "learning_rate": 5.03518493822014e-06,
+ "loss": 0.2923,
+ "step": 1500
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.8405963302752294,
+ "eval_loss": 0.43070515990257263,
+ "eval_runtime": 0.931,
+ "eval_samples_per_second": 936.614,
+ "eval_steps_per_second": 59.075,
+ "step": 1684
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2105,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 128448478618824.0,
+ "train_batch_size": 16,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.751911453711305e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 16,
+ "seed": 24
+ }
+}
diff --git a/run-0/checkpoint-1684/training_args.bin b/run-0/checkpoint-1684/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ddb41ce8abf80a6a2ba39fd74cf0bd8a80a36c19
--- /dev/null
+++ b/run-0/checkpoint-1684/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce3aa54e4b5eb040628e4ef793b06ccd6e1dd48fa8ded6edc38a1248d8d9cd9f
+size 4984
diff --git a/run-0/checkpoint-2105/config.json b/run-0/checkpoint-2105/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-0/checkpoint-2105/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-0/checkpoint-2105/model.safetensors b/run-0/checkpoint-2105/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bca9ef1ec380003373d286c9bc0e9cd32f565f7f
--- /dev/null
+++ b/run-0/checkpoint-2105/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e618eba8adb30ccfca463c545bc511bd3bace5da4e44ed575b8ee43d53af2136
+size 470641664
diff --git a/run-0/checkpoint-2105/optimizer.pt b/run-0/checkpoint-2105/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8aba38e4d1d0769c6029694131382a4f6b48f8ee
--- /dev/null
+++ b/run-0/checkpoint-2105/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36d6536d428f2bef3d905316338197e405c6f15425a772986a454d02d396dc35
+size 941404410
diff --git a/run-0/checkpoint-2105/rng_state.pth b/run-0/checkpoint-2105/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..93c2718eeac51ce4bf9fc861d29ef8cd76fead90
--- /dev/null
+++ b/run-0/checkpoint-2105/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7ca3d173a2b65c0f89032545475725e910966bcf5e20d10b6d50bb64da7be7b
+size 14308
diff --git a/run-0/checkpoint-2105/scheduler.pt b/run-0/checkpoint-2105/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3d321eb73fcec51da03808d10a4de50f4925b8f
--- /dev/null
+++ b/run-0/checkpoint-2105/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94f58125482936324bdc3202e3664981e3b85dd6760eeb8e339fd6a5450bc072
+size 1064
diff --git a/run-0/checkpoint-2105/sentencepiece.bpe.model b/run-0/checkpoint-2105/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-0/checkpoint-2105/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-0/checkpoint-2105/special_tokens_map.json b/run-0/checkpoint-2105/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-0/checkpoint-2105/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-0/checkpoint-2105/tokenizer.json b/run-0/checkpoint-2105/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-0/checkpoint-2105/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-0/checkpoint-2105/tokenizer_config.json b/run-0/checkpoint-2105/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-0/checkpoint-2105/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-0/checkpoint-2105/trainer_state.json b/run-0/checkpoint-2105/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b2537e2d7fb2b2586c4495830d2f908d213b9163
--- /dev/null
+++ b/run-0/checkpoint-2105/trainer_state.json
@@ -0,0 +1,99 @@
+{
+ "best_metric": 0.8405963302752294,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-0/checkpoint-1684",
+ "epoch": 5.0,
+ "eval_steps": 500,
+ "global_step": 2105,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8119266055045872,
+ "eval_loss": 0.4295983910560608,
+ "eval_runtime": 1.0076,
+ "eval_samples_per_second": 865.424,
+ "eval_steps_per_second": 54.585,
+ "step": 421
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 8.123950004577637,
+ "learning_rate": 1.3357804670815412e-05,
+ "loss": 0.4991,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.823394495412844,
+ "eval_loss": 0.39807233214378357,
+ "eval_runtime": 1.0042,
+ "eval_samples_per_second": 868.388,
+ "eval_steps_per_second": 54.772,
+ "step": 842
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 5.427116870880127,
+ "learning_rate": 9.196494804517776e-06,
+ "loss": 0.3654,
+ "step": 1000
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8337155963302753,
+ "eval_loss": 0.40886953473091125,
+ "eval_runtime": 0.9135,
+ "eval_samples_per_second": 954.617,
+ "eval_steps_per_second": 60.211,
+ "step": 1263
+ },
+ {
+ "epoch": 3.56,
+ "grad_norm": 1.0015597343444824,
+ "learning_rate": 5.03518493822014e-06,
+ "loss": 0.2923,
+ "step": 1500
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.8405963302752294,
+ "eval_loss": 0.43070515990257263,
+ "eval_runtime": 0.931,
+ "eval_samples_per_second": 936.614,
+ "eval_steps_per_second": 59.075,
+ "step": 1684
+ },
+ {
+ "epoch": 4.75,
+ "grad_norm": 18.156158447265625,
+ "learning_rate": 8.738750719225037e-07,
+ "loss": 0.2538,
+ "step": 2000
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy": 0.838302752293578,
+ "eval_loss": 0.4588472545146942,
+ "eval_runtime": 1.0148,
+ "eval_samples_per_second": 859.266,
+ "eval_steps_per_second": 54.197,
+ "step": 2105
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2105,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 170928927395856.0,
+ "train_batch_size": 16,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.751911453711305e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 16,
+ "seed": 24
+ }
+}
diff --git a/run-0/checkpoint-2105/training_args.bin b/run-0/checkpoint-2105/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ddb41ce8abf80a6a2ba39fd74cf0bd8a80a36c19
--- /dev/null
+++ b/run-0/checkpoint-2105/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce3aa54e4b5eb040628e4ef793b06ccd6e1dd48fa8ded6edc38a1248d8d9cd9f
+size 4984
diff --git a/run-0/checkpoint-842/config.json b/run-0/checkpoint-842/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-0/checkpoint-842/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-0/checkpoint-842/model.safetensors b/run-0/checkpoint-842/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fc0da6d328f52c639aa4d4729fba58b9a0ade785
--- /dev/null
+++ b/run-0/checkpoint-842/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4784ca084b0a43fa198b34ad925d28939eba9788418a3ddf34f133c630a2cbcb
+size 470641664
diff --git a/run-0/checkpoint-842/optimizer.pt b/run-0/checkpoint-842/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1221f1c3bdf68d06e7f4b3abf9edc82f6a07728
--- /dev/null
+++ b/run-0/checkpoint-842/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e11d804e4cc145d31d9061e9c8182f4c440fb41ba9e2d9cf8c4e608b320c7233
+size 941404410
diff --git a/run-0/checkpoint-842/rng_state.pth b/run-0/checkpoint-842/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d67c07da2d655db9f0f31ea03c707bd5a7c10a6b
--- /dev/null
+++ b/run-0/checkpoint-842/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec272268295ea8bc34374cc8be110ac45ec65a2e0880d4c3c9f585375c05a34d
+size 14308
diff --git a/run-0/checkpoint-842/scheduler.pt b/run-0/checkpoint-842/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..259e9f96663b4778396583834e98be7b1827a089
--- /dev/null
+++ b/run-0/checkpoint-842/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e8c99c2d52c113ff19a3c5d4eeb160106bd8dbbdb7f488b071fc32090c8a214
+size 1064
diff --git a/run-0/checkpoint-842/sentencepiece.bpe.model b/run-0/checkpoint-842/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-0/checkpoint-842/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-0/checkpoint-842/special_tokens_map.json b/run-0/checkpoint-842/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-0/checkpoint-842/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-0/checkpoint-842/tokenizer.json b/run-0/checkpoint-842/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-0/checkpoint-842/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-0/checkpoint-842/tokenizer_config.json b/run-0/checkpoint-842/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-0/checkpoint-842/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-0/checkpoint-842/trainer_state.json b/run-0/checkpoint-842/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..482d21d860d5913c96a46c867569f2400fa1f33f
--- /dev/null
+++ b/run-0/checkpoint-842/trainer_state.json
@@ -0,0 +1,51 @@
+{
+ "best_metric": 0.823394495412844,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-0/checkpoint-842",
+ "epoch": 2.0,
+ "eval_steps": 500,
+ "global_step": 842,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8119266055045872,
+ "eval_loss": 0.4295983910560608,
+ "eval_runtime": 1.0076,
+ "eval_samples_per_second": 865.424,
+ "eval_steps_per_second": 54.585,
+ "step": 421
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 8.123950004577637,
+ "learning_rate": 1.3357804670815412e-05,
+ "loss": 0.4991,
+ "step": 500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.823394495412844,
+ "eval_loss": 0.39807233214378357,
+ "eval_runtime": 1.0042,
+ "eval_samples_per_second": 868.388,
+ "eval_steps_per_second": 54.772,
+ "step": 842
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 2105,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 42505408366176.0,
+ "train_batch_size": 16,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.751911453711305e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 16,
+ "seed": 24
+ }
+}
diff --git a/run-0/checkpoint-842/training_args.bin b/run-0/checkpoint-842/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ddb41ce8abf80a6a2ba39fd74cf0bd8a80a36c19
--- /dev/null
+++ b/run-0/checkpoint-842/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce3aa54e4b5eb040628e4ef793b06ccd6e1dd48fa8ded6edc38a1248d8d9cd9f
+size 4984
diff --git a/run-1/checkpoint-1684/config.json b/run-1/checkpoint-1684/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-1/checkpoint-1684/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-1/checkpoint-1684/model.safetensors b/run-1/checkpoint-1684/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5e12ad77e9d0b0a5052ce16b9d2e344e0481cc3e
--- /dev/null
+++ b/run-1/checkpoint-1684/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16a921a72cdabca7c39ac9b4fa0be0cc871fbe54bb5eeb97a5ae1bbab6efd3df
+size 470641664
diff --git a/run-1/checkpoint-1684/optimizer.pt b/run-1/checkpoint-1684/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f909fd476de89c22cda1cab2aaf5ac8a63e7ca0
--- /dev/null
+++ b/run-1/checkpoint-1684/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80aaa8d2c4d28a382afec04f4a59dd03df0e8c190d750e8c9adfe90a07f79acd
+size 941404410
diff --git a/run-1/checkpoint-1684/rng_state.pth b/run-1/checkpoint-1684/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7ccfbd536f55f854017981a6aaad4ecc7099a384
--- /dev/null
+++ b/run-1/checkpoint-1684/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6134807491dee851aa7553fa633268a2a24961a412137c1864392e5e48f836f
+size 14244
diff --git a/run-1/checkpoint-1684/scheduler.pt b/run-1/checkpoint-1684/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1fcb515e64d374613f04f734ae30e3a28e3cba7d
--- /dev/null
+++ b/run-1/checkpoint-1684/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96f297e6c0dfd39d248d448aac313b29c2ee18621b19510394ed51bc68abf1a6
+size 1064
diff --git a/run-1/checkpoint-1684/sentencepiece.bpe.model b/run-1/checkpoint-1684/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-1/checkpoint-1684/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-1/checkpoint-1684/special_tokens_map.json b/run-1/checkpoint-1684/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-1/checkpoint-1684/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-1684/tokenizer.json b/run-1/checkpoint-1684/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-1/checkpoint-1684/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-1/checkpoint-1684/tokenizer_config.json b/run-1/checkpoint-1684/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-1/checkpoint-1684/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-1684/trainer_state.json b/run-1/checkpoint-1684/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..dbf8720ee8ea8c1462209c6a367340ff8380bd44
--- /dev/null
+++ b/run-1/checkpoint-1684/trainer_state.json
@@ -0,0 +1,65 @@
+{
+ "best_metric": 0.8256880733944955,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-1/checkpoint-1684",
+ "epoch": 2.0,
+ "eval_steps": 500,
+ "global_step": 1684,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.59,
+ "grad_norm": 7.641239166259766,
+ "learning_rate": 1.4914273614162126e-05,
+ "loss": 0.5407,
+ "step": 500
+ },
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8084862385321101,
+ "eval_loss": 0.4513384699821472,
+ "eval_runtime": 0.9147,
+ "eval_samples_per_second": 953.342,
+ "eval_steps_per_second": 60.131,
+ "step": 842
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 20.01938247680664,
+ "learning_rate": 1.2904263693116017e-05,
+ "loss": 0.4323,
+ "step": 1000
+ },
+ {
+ "epoch": 1.78,
+ "grad_norm": 2.568437337875366,
+ "learning_rate": 1.089425377206991e-05,
+ "loss": 0.3959,
+ "step": 1500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8256880733944955,
+ "eval_loss": 0.45341619849205017,
+ "eval_runtime": 1.2452,
+ "eval_samples_per_second": 700.289,
+ "eval_steps_per_second": 44.17,
+ "step": 1684
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 4210,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 54209268494220.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.6924283535208234e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 8,
+ "seed": 25
+ }
+}
diff --git a/run-1/checkpoint-1684/training_args.bin b/run-1/checkpoint-1684/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..685be838abf51c8f43cff5969aae716fdec2b8e3
--- /dev/null
+++ b/run-1/checkpoint-1684/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e429b7a648ede63895839361e40497178ac098dbcde1deb4b4550ebe214f14
+size 4984
diff --git a/run-1/checkpoint-2526/config.json b/run-1/checkpoint-2526/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-1/checkpoint-2526/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-1/checkpoint-2526/model.safetensors b/run-1/checkpoint-2526/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..435f25a028409bfe92206ecae0cbd04c6990c9da
--- /dev/null
+++ b/run-1/checkpoint-2526/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba481d96afc8deb6f4b3a87a79d03c569496d8ddbc3e689c1059a2f6e1fbf534
+size 470641664
diff --git a/run-1/checkpoint-2526/optimizer.pt b/run-1/checkpoint-2526/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ae154dd56083a374c077f9ee3cffcceabb8b7d31
--- /dev/null
+++ b/run-1/checkpoint-2526/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d56eb960e6de6ed15b46a386715c13febecd1c877fd11caaf7939a0c9e0a9c66
+size 941404410
diff --git a/run-1/checkpoint-2526/rng_state.pth b/run-1/checkpoint-2526/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0644e3b7b7420d0bb44d85f014749ba921340087
--- /dev/null
+++ b/run-1/checkpoint-2526/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:226cbb3f3dff96465ce2ad4f5d112997aceb83a8e8a823197e9ac0c43dc89442
+size 14244
diff --git a/run-1/checkpoint-2526/scheduler.pt b/run-1/checkpoint-2526/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..36a5c378e21e7ae47984317e8bddc7bee475ef23
--- /dev/null
+++ b/run-1/checkpoint-2526/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fb94542c69e2c4c5ddd25ba27fcc8508f02b848a2755e057a4cc4689d99c1a6
+size 1064
diff --git a/run-1/checkpoint-2526/sentencepiece.bpe.model b/run-1/checkpoint-2526/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-1/checkpoint-2526/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-1/checkpoint-2526/special_tokens_map.json b/run-1/checkpoint-2526/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-1/checkpoint-2526/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-2526/tokenizer.json b/run-1/checkpoint-2526/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-1/checkpoint-2526/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-1/checkpoint-2526/tokenizer_config.json b/run-1/checkpoint-2526/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-1/checkpoint-2526/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-2526/trainer_state.json b/run-1/checkpoint-2526/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3de320d1a05d075d668afc523cd193b0f9327b1b
--- /dev/null
+++ b/run-1/checkpoint-2526/trainer_state.json
@@ -0,0 +1,88 @@
+{
+ "best_metric": 0.8348623853211009,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-1/checkpoint-2526",
+ "epoch": 3.0,
+ "eval_steps": 500,
+ "global_step": 2526,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.59,
+ "grad_norm": 7.641239166259766,
+ "learning_rate": 1.4914273614162126e-05,
+ "loss": 0.5407,
+ "step": 500
+ },
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8084862385321101,
+ "eval_loss": 0.4513384699821472,
+ "eval_runtime": 0.9147,
+ "eval_samples_per_second": 953.342,
+ "eval_steps_per_second": 60.131,
+ "step": 842
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 20.01938247680664,
+ "learning_rate": 1.2904263693116017e-05,
+ "loss": 0.4323,
+ "step": 1000
+ },
+ {
+ "epoch": 1.78,
+ "grad_norm": 2.568437337875366,
+ "learning_rate": 1.089425377206991e-05,
+ "loss": 0.3959,
+ "step": 1500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8256880733944955,
+ "eval_loss": 0.45341619849205017,
+ "eval_runtime": 1.2452,
+ "eval_samples_per_second": 700.289,
+ "eval_steps_per_second": 44.17,
+ "step": 1684
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 0.6975702047348022,
+ "learning_rate": 8.884243851023799e-06,
+ "loss": 0.361,
+ "step": 2000
+ },
+ {
+ "epoch": 2.97,
+ "grad_norm": 0.5081818699836731,
+ "learning_rate": 6.874233929977691e-06,
+ "loss": 0.3226,
+ "step": 2500
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8348623853211009,
+ "eval_loss": 0.5020601153373718,
+ "eval_runtime": 0.9128,
+ "eval_samples_per_second": 955.316,
+ "eval_steps_per_second": 60.255,
+ "step": 2526
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 4210,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 90087133996608.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.6924283535208234e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 8,
+ "seed": 25
+ }
+}
diff --git a/run-1/checkpoint-2526/training_args.bin b/run-1/checkpoint-2526/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..685be838abf51c8f43cff5969aae716fdec2b8e3
--- /dev/null
+++ b/run-1/checkpoint-2526/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e429b7a648ede63895839361e40497178ac098dbcde1deb4b4550ebe214f14
+size 4984
diff --git a/run-1/checkpoint-3368/config.json b/run-1/checkpoint-3368/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-1/checkpoint-3368/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-1/checkpoint-3368/model.safetensors b/run-1/checkpoint-3368/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..89423c8dc14c86a20b9db4ab9cc73094044d3a55
--- /dev/null
+++ b/run-1/checkpoint-3368/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44c4af8f6296f21368e43808b3ee8fd6020630b11e566cfa9c5226c1e3b56f0a
+size 470641664
diff --git a/run-1/checkpoint-3368/optimizer.pt b/run-1/checkpoint-3368/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b726af6ca9e8a92155cf1b140e4715c5fba1a2c
--- /dev/null
+++ b/run-1/checkpoint-3368/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01e5cc607fb67cd58c03bbcf128949e355f1c8e640d74b212b4323a9dc0ca0b8
+size 941404410
diff --git a/run-1/checkpoint-3368/rng_state.pth b/run-1/checkpoint-3368/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..240470fd4a11332c7015f961225dc14319688560
--- /dev/null
+++ b/run-1/checkpoint-3368/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:362716edafd1017e82a643f4a2702ac5ff7d428680fd556a91b1776c26dcb4e0
+size 14244
diff --git a/run-1/checkpoint-3368/scheduler.pt b/run-1/checkpoint-3368/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ddbebac7c424650a59993a699d1581bc76f1abf4
--- /dev/null
+++ b/run-1/checkpoint-3368/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01c12a4665d1be79fd3eef194c1078279d23fef1494e245d29cfea725bb38082
+size 1064
diff --git a/run-1/checkpoint-3368/sentencepiece.bpe.model b/run-1/checkpoint-3368/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-1/checkpoint-3368/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-1/checkpoint-3368/special_tokens_map.json b/run-1/checkpoint-3368/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-1/checkpoint-3368/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-3368/tokenizer.json b/run-1/checkpoint-3368/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-1/checkpoint-3368/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-1/checkpoint-3368/tokenizer_config.json b/run-1/checkpoint-3368/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-1/checkpoint-3368/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-3368/trainer_state.json b/run-1/checkpoint-3368/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..582f0b23d34f30af4f9227abc5e9170be290b097
--- /dev/null
+++ b/run-1/checkpoint-3368/trainer_state.json
@@ -0,0 +1,104 @@
+{
+ "best_metric": 0.8394495412844036,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-1/checkpoint-3368",
+ "epoch": 4.0,
+ "eval_steps": 500,
+ "global_step": 3368,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.59,
+ "grad_norm": 7.641239166259766,
+ "learning_rate": 1.4914273614162126e-05,
+ "loss": 0.5407,
+ "step": 500
+ },
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8084862385321101,
+ "eval_loss": 0.4513384699821472,
+ "eval_runtime": 0.9147,
+ "eval_samples_per_second": 953.342,
+ "eval_steps_per_second": 60.131,
+ "step": 842
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 20.01938247680664,
+ "learning_rate": 1.2904263693116017e-05,
+ "loss": 0.4323,
+ "step": 1000
+ },
+ {
+ "epoch": 1.78,
+ "grad_norm": 2.568437337875366,
+ "learning_rate": 1.089425377206991e-05,
+ "loss": 0.3959,
+ "step": 1500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8256880733944955,
+ "eval_loss": 0.45341619849205017,
+ "eval_runtime": 1.2452,
+ "eval_samples_per_second": 700.289,
+ "eval_steps_per_second": 44.17,
+ "step": 1684
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 0.6975702047348022,
+ "learning_rate": 8.884243851023799e-06,
+ "loss": 0.361,
+ "step": 2000
+ },
+ {
+ "epoch": 2.97,
+ "grad_norm": 0.5081818699836731,
+ "learning_rate": 6.874233929977691e-06,
+ "loss": 0.3226,
+ "step": 2500
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8348623853211009,
+ "eval_loss": 0.5020601153373718,
+ "eval_runtime": 0.9128,
+ "eval_samples_per_second": 955.316,
+ "eval_steps_per_second": 60.255,
+ "step": 2526
+ },
+ {
+ "epoch": 3.56,
+ "grad_norm": 39.4764518737793,
+ "learning_rate": 4.864224008931583e-06,
+ "loss": 0.2707,
+ "step": 3000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.8394495412844036,
+ "eval_loss": 0.5194000005722046,
+ "eval_runtime": 1.1521,
+ "eval_samples_per_second": 756.852,
+ "eval_steps_per_second": 47.737,
+ "step": 3368
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 4210,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 107625105100548.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.6924283535208234e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 8,
+ "seed": 25
+ }
+}
diff --git a/run-1/checkpoint-3368/training_args.bin b/run-1/checkpoint-3368/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..685be838abf51c8f43cff5969aae716fdec2b8e3
--- /dev/null
+++ b/run-1/checkpoint-3368/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e429b7a648ede63895839361e40497178ac098dbcde1deb4b4550ebe214f14
+size 4984
diff --git a/run-1/checkpoint-4210/config.json b/run-1/checkpoint-4210/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-1/checkpoint-4210/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-1/checkpoint-4210/model.safetensors b/run-1/checkpoint-4210/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c266208a6bf882c0d142b449f40abe89ed13359e
--- /dev/null
+++ b/run-1/checkpoint-4210/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a2c3ffa1fb4c133714679a2888861980e0e7619d171fc8e8a7fd66b56a7e4d4
+size 470641664
diff --git a/run-1/checkpoint-4210/optimizer.pt b/run-1/checkpoint-4210/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a401843a2c48fb7266de016488f46f30a03eebbf
--- /dev/null
+++ b/run-1/checkpoint-4210/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:326221b11f18b2391a10f911e1ffe1dd3c28be8c87a63258442bed91d7dfd666
+size 941404410
diff --git a/run-1/checkpoint-4210/rng_state.pth b/run-1/checkpoint-4210/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..2dbc6744e11edc91f519d9d5e7eec3608d644668
--- /dev/null
+++ b/run-1/checkpoint-4210/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca5b22afa3d2dece356c6dbe7661ab29ed6fafb0728506fb22217110049a1510
+size 14244
diff --git a/run-1/checkpoint-4210/scheduler.pt b/run-1/checkpoint-4210/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..12356957bd5095e41015c06ad7d81b4eb5cf3b99
--- /dev/null
+++ b/run-1/checkpoint-4210/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b91d8895d67eff2efa1ac4d06ddea534b561a35b8c48e2b8a0b1d9dd1bdba649
+size 1064
diff --git a/run-1/checkpoint-4210/sentencepiece.bpe.model b/run-1/checkpoint-4210/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-1/checkpoint-4210/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-1/checkpoint-4210/special_tokens_map.json b/run-1/checkpoint-4210/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-1/checkpoint-4210/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-4210/tokenizer.json b/run-1/checkpoint-4210/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-1/checkpoint-4210/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-1/checkpoint-4210/tokenizer_config.json b/run-1/checkpoint-4210/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-1/checkpoint-4210/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-4210/trainer_state.json b/run-1/checkpoint-4210/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f25c57826a758b56d7a67741759ffbc640e76a11
--- /dev/null
+++ b/run-1/checkpoint-4210/trainer_state.json
@@ -0,0 +1,127 @@
+{
+ "best_metric": 0.8440366972477065,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-1/checkpoint-4210",
+ "epoch": 5.0,
+ "eval_steps": 500,
+ "global_step": 4210,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.59,
+ "grad_norm": 7.641239166259766,
+ "learning_rate": 1.4914273614162126e-05,
+ "loss": 0.5407,
+ "step": 500
+ },
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8084862385321101,
+ "eval_loss": 0.4513384699821472,
+ "eval_runtime": 0.9147,
+ "eval_samples_per_second": 953.342,
+ "eval_steps_per_second": 60.131,
+ "step": 842
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 20.01938247680664,
+ "learning_rate": 1.2904263693116017e-05,
+ "loss": 0.4323,
+ "step": 1000
+ },
+ {
+ "epoch": 1.78,
+ "grad_norm": 2.568437337875366,
+ "learning_rate": 1.089425377206991e-05,
+ "loss": 0.3959,
+ "step": 1500
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8256880733944955,
+ "eval_loss": 0.45341619849205017,
+ "eval_runtime": 1.2452,
+ "eval_samples_per_second": 700.289,
+ "eval_steps_per_second": 44.17,
+ "step": 1684
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 0.6975702047348022,
+ "learning_rate": 8.884243851023799e-06,
+ "loss": 0.361,
+ "step": 2000
+ },
+ {
+ "epoch": 2.97,
+ "grad_norm": 0.5081818699836731,
+ "learning_rate": 6.874233929977691e-06,
+ "loss": 0.3226,
+ "step": 2500
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8348623853211009,
+ "eval_loss": 0.5020601153373718,
+ "eval_runtime": 0.9128,
+ "eval_samples_per_second": 955.316,
+ "eval_steps_per_second": 60.255,
+ "step": 2526
+ },
+ {
+ "epoch": 3.56,
+ "grad_norm": 39.4764518737793,
+ "learning_rate": 4.864224008931583e-06,
+ "loss": 0.2707,
+ "step": 3000
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.8394495412844036,
+ "eval_loss": 0.5194000005722046,
+ "eval_runtime": 1.1521,
+ "eval_samples_per_second": 756.852,
+ "eval_steps_per_second": 47.737,
+ "step": 3368
+ },
+ {
+ "epoch": 4.16,
+ "grad_norm": 0.2999955117702484,
+ "learning_rate": 2.8542140878854744e-06,
+ "loss": 0.2608,
+ "step": 3500
+ },
+ {
+ "epoch": 4.75,
+ "grad_norm": 37.41545867919922,
+ "learning_rate": 8.442041668393656e-07,
+ "loss": 0.2394,
+ "step": 4000
+ },
+ {
+ "epoch": 5.0,
+ "eval_accuracy": 0.8440366972477065,
+ "eval_loss": 0.5257198810577393,
+ "eval_runtime": 0.9113,
+ "eval_samples_per_second": 956.866,
+ "eval_steps_per_second": 60.353,
+ "step": 4210
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 4210,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 143595990102684.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.6924283535208234e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 8,
+ "seed": 25
+ }
+}
diff --git a/run-1/checkpoint-4210/training_args.bin b/run-1/checkpoint-4210/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..685be838abf51c8f43cff5969aae716fdec2b8e3
--- /dev/null
+++ b/run-1/checkpoint-4210/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e429b7a648ede63895839361e40497178ac098dbcde1deb4b4550ebe214f14
+size 4984
diff --git a/run-1/checkpoint-842/config.json b/run-1/checkpoint-842/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-1/checkpoint-842/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-1/checkpoint-842/model.safetensors b/run-1/checkpoint-842/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..740721d782e9294d57b7a303203dcc39cbe4429a
--- /dev/null
+++ b/run-1/checkpoint-842/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a3fc766ab0093cf34f31cdc674b7f31d8e666c9892cc02763446c4f4b0dfed5
+size 470641664
diff --git a/run-1/checkpoint-842/optimizer.pt b/run-1/checkpoint-842/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1cdd285c9538477ec4c6fa5e15f374f2bacba898
--- /dev/null
+++ b/run-1/checkpoint-842/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa789179612602068fe03818aefa2110d5ca67152b209c7d5b81ab206a2fe9ab
+size 941404410
diff --git a/run-1/checkpoint-842/rng_state.pth b/run-1/checkpoint-842/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..963a6a4e388bb54184402850e5dfaaaa37e3042c
--- /dev/null
+++ b/run-1/checkpoint-842/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06991aee24b1bced4592e84dfdec9ac6c8b833d29b39c28e9b3d0941d485359f
+size 14244
diff --git a/run-1/checkpoint-842/scheduler.pt b/run-1/checkpoint-842/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..398c35f844624f53f286c3f395a2919b1d468bfc
--- /dev/null
+++ b/run-1/checkpoint-842/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b2a3fb94c7cd856cca2c06338856ca838494d31d2ff1791bc58fcaeba2da622
+size 1064
diff --git a/run-1/checkpoint-842/sentencepiece.bpe.model b/run-1/checkpoint-842/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-1/checkpoint-842/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-1/checkpoint-842/special_tokens_map.json b/run-1/checkpoint-842/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-1/checkpoint-842/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-842/tokenizer.json b/run-1/checkpoint-842/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-1/checkpoint-842/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-1/checkpoint-842/tokenizer_config.json b/run-1/checkpoint-842/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-1/checkpoint-842/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-1/checkpoint-842/trainer_state.json b/run-1/checkpoint-842/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..93a200ed4042a8c45b47247234c58fdeeeba8072
--- /dev/null
+++ b/run-1/checkpoint-842/trainer_state.json
@@ -0,0 +1,42 @@
+{
+ "best_metric": 0.8084862385321101,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-1/checkpoint-842",
+ "epoch": 1.0,
+ "eval_steps": 500,
+ "global_step": 842,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.59,
+ "grad_norm": 7.641239166259766,
+ "learning_rate": 1.4914273614162126e-05,
+ "loss": 0.5407,
+ "step": 500
+ },
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.8084862385321101,
+ "eval_loss": 0.4513384699821472,
+ "eval_runtime": 0.9147,
+ "eval_samples_per_second": 953.342,
+ "eval_steps_per_second": 60.131,
+ "step": 842
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 4210,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 5,
+ "save_steps": 500,
+ "total_flos": 17905031453568.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.6924283535208234e-05,
+ "num_train_epochs": 5,
+ "per_device_train_batch_size": 8,
+ "seed": 25
+ }
+}
diff --git a/run-1/checkpoint-842/training_args.bin b/run-1/checkpoint-842/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..685be838abf51c8f43cff5969aae716fdec2b8e3
--- /dev/null
+++ b/run-1/checkpoint-842/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e429b7a648ede63895839361e40497178ac098dbcde1deb4b4550ebe214f14
+size 4984
diff --git a/run-2/checkpoint-106/config.json b/run-2/checkpoint-106/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-2/checkpoint-106/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-2/checkpoint-106/model.safetensors b/run-2/checkpoint-106/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..72f666f019d40bfdb652cff79194afd25ff5427b
--- /dev/null
+++ b/run-2/checkpoint-106/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f8ec01152500593829ff0f0c7a3be8665dc76acbde1ff542cbd5ec4be62d9ca
+size 470641664
diff --git a/run-2/checkpoint-106/optimizer.pt b/run-2/checkpoint-106/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7de119d6834b63f23ff9ca22ff774016d6574cbe
--- /dev/null
+++ b/run-2/checkpoint-106/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22a914226b13f37d63a186dafff8f05b02593f6415c244d98eba054ddc08a71f
+size 941404410
diff --git a/run-2/checkpoint-106/rng_state.pth b/run-2/checkpoint-106/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..b46d4a561f7aba9cd217eee33f1463771f5e172b
--- /dev/null
+++ b/run-2/checkpoint-106/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adf1412392a1b9d1a363d57b26d7a27a09660a0c8845780ee0b6df11df6605ba
+size 14244
diff --git a/run-2/checkpoint-106/scheduler.pt b/run-2/checkpoint-106/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5f6ba239f83b7ad134f0f2e1f3214b6e334f176
--- /dev/null
+++ b/run-2/checkpoint-106/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c1688fed11bff0984b7eb2a6e205640675eafe272c048f08089c79f2dee25dc
+size 1064
diff --git a/run-2/checkpoint-106/sentencepiece.bpe.model b/run-2/checkpoint-106/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-2/checkpoint-106/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-2/checkpoint-106/special_tokens_map.json b/run-2/checkpoint-106/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-2/checkpoint-106/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-2/checkpoint-106/tokenizer.json b/run-2/checkpoint-106/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-2/checkpoint-106/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-2/checkpoint-106/tokenizer_config.json b/run-2/checkpoint-106/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-2/checkpoint-106/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-2/checkpoint-106/trainer_state.json b/run-2/checkpoint-106/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..70880cff2a86ab20a02955672dc1e3be727c2776
--- /dev/null
+++ b/run-2/checkpoint-106/trainer_state.json
@@ -0,0 +1,35 @@
+{
+ "best_metric": 0.5091743119266054,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-2/checkpoint-106",
+ "epoch": 1.0,
+ "eval_steps": 500,
+ "global_step": 106,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.5091743119266054,
+ "eval_loss": 0.6923818588256836,
+ "eval_runtime": 0.9986,
+ "eval_samples_per_second": 873.219,
+ "eval_steps_per_second": 55.077,
+ "step": 106
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 106,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 1,
+ "save_steps": 500,
+ "total_flos": 0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 5.275652958749499e-06,
+ "num_train_epochs": 1,
+ "per_device_train_batch_size": 64,
+ "seed": 4
+ }
+}
diff --git a/run-2/checkpoint-106/training_args.bin b/run-2/checkpoint-106/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e1a0adaec4771afe8bcc6d092ea4cc160137616a
--- /dev/null
+++ b/run-2/checkpoint-106/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eefd5f7797a984cbeef2a22b4948efc638582cac13afcda07006d095205caaef
+size 4984
diff --git a/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709637985.0db4763e2117.1712.2 b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709637985.0db4763e2117.1712.2
index 25a6cf882681c31c86208b9b9cf074b77637b5fc..cae8193d218bbcede146d4e71ff2cd81eacab29a 100644
--- a/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709637985.0db4763e2117.1712.2
+++ b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709637985.0db4763e2117.1712.2
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:ef34316d1b56e49fbd57b310fade86634b5ff608f0b30c3903b06de13e723374
-size 5204
+oid sha256:dfacfe21219bedbe1c9b8dbbcde6c7d5288f0d7cb0f04cf07dfb99d996434be6
+size 7483
diff --git a/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709638354.0db4763e2117.1712.3 b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709638354.0db4763e2117.1712.3
new file mode 100644
index 0000000000000000000000000000000000000000..c30fbd8426df219a5780eacd45f968a775c1ba30
--- /dev/null
+++ b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709638354.0db4763e2117.1712.3
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7faae7c6e0edf1b1c4f4bffaf3d1b507b15e8b23c7d5741795aa16d240c795bd
+size 8327
diff --git a/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709638887.0db4763e2117.1712.4 b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709638887.0db4763e2117.1712.4
new file mode 100644
index 0000000000000000000000000000000000000000..5d228d470bac16be6b45ed211b5a8b566f31cae8
--- /dev/null
+++ b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709638887.0db4763e2117.1712.4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdc4463afa53afa34c95bbb8668352614490ea7c8a443cef0ef53a5d6c0da65f
+size 5334
diff --git a/training_args.bin b/training_args.bin
index ddb41ce8abf80a6a2ba39fd74cf0bd8a80a36c19..e1a0adaec4771afe8bcc6d092ea4cc160137616a 100644
--- a/training_args.bin
+++ b/training_args.bin
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:ce3aa54e4b5eb040628e4ef793b06ccd6e1dd48fa8ded6edc38a1248d8d9cd9f
+oid sha256:eefd5f7797a984cbeef2a22b4948efc638582cac13afcda07006d095205caaef
size 4984