Training in progress, step 500

Browse files

Files changed (12) hide show

README.md +8 -8
all_results.json +10 -4
eval_results.json +4 -4
model.safetensors +1 -1
preprocessor_config.json +28 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
train_results.json +8 -0
trainer_state.json +192 -0
training_args.bin +1 -1
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -9,15 +9,15 @@ model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/shark_meow_team/huggingface/runs/fkvo1408)
 # aoi_clip
 This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
-- eval_loss: 1.2665
-- eval_runtime: 90.1455
-- eval_samples_per_second: 192.611
-- eval_steps_per_second: 48.155
 - step: 0
 ## Model description
@@ -37,9 +37,9 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 5e-05
-- train_batch_size: 4
-- eval_batch_size: 4
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear

 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/shark_meow_team/huggingface/runs/rttkkl5c)
 # aoi_clip
 This model was trained from scratch on an unknown dataset.
 It achieves the following results on the evaluation set:
+- eval_loss: 5.4099
+- eval_runtime: 95.2682
+- eval_samples_per_second: 317.031
+- eval_steps_per_second: 7.211
 - step: 0
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 40
+- eval_batch_size: 44
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear

all_results.json CHANGED Viewed

@@ -1,6 +1,12 @@
 {
-    "eval_loss": 1.266502857208252,
-    "eval_runtime": 90.1455,
-    "eval_samples_per_second": 192.611,
-    "eval_steps_per_second": 48.155
 }

 {
+    "epoch": 100.0,
+    "eval_loss": 5.409937381744385,
+    "eval_runtime": 95.2682,
+    "eval_samples_per_second": 317.031,
+    "eval_steps_per_second": 7.211,
+    "total_flos": 6.859464121840128e+17,
+    "train_loss": 0.11642176906541846,
+    "train_runtime": 15360.6557,
+    "train_samples_per_second": 113.036,
+    "train_steps_per_second": 2.832
 }

eval_results.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-    "eval_loss": 1.266502857208252,
-    "eval_runtime": 90.1455,
-    "eval_samples_per_second": 192.611,
-    "eval_steps_per_second": 48.155
 }

 {
+    "eval_loss": 5.409937381744385,
+    "eval_runtime": 95.2682,
+    "eval_samples_per_second": 317.031,
+    "eval_steps_per_second": 7.211
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e457732250e9fbed99b016346a91b5340e50dfc24253484eac48a07c5fd097e
 size 1162455388

 version https://git-lfs.github.com/spec/v1
+oid sha256:24e93685a8e88274d883bcb9c2efadee803084147834139d25d748d6bffc4fdb
 size 1162455388

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": false,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "ChineseCLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 100.0,
+    "total_flos": 6.859464121840128e+17,
+    "train_loss": 0.11642176906541846,
+    "train_runtime": 15360.6557,
+    "train_samples_per_second": 113.036,
+    "train_steps_per_second": 2.832
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,192 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 100.0,
+  "eval_steps": 4350,
+  "global_step": 43500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.00021520511654671282,
+      "learning_rate": 9.000919540229886e-06,
+      "loss": 0.48,
+      "step": 4350
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 3.651794195175171,
+      "eval_runtime": 55.7113,
+      "eval_samples_per_second": 311.66,
+      "eval_steps_per_second": 7.09,
+      "step": 4350
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 0.00017535020015202463,
+      "learning_rate": 8.001379310344829e-06,
+      "loss": 0.1193,
+      "step": 8700
+    },
+    {
+      "epoch": 20.0,
+      "eval_loss": 3.453643798828125,
+      "eval_runtime": 55.7389,
+      "eval_samples_per_second": 311.506,
+      "eval_steps_per_second": 7.087,
+      "step": 8700
+    },
+    {
+      "epoch": 30.0,
+      "grad_norm": 0.18407990038394928,
+      "learning_rate": 7.00183908045977e-06,
+      "loss": 0.0925,
+      "step": 13050
+    },
+    {
+      "epoch": 30.0,
+      "eval_loss": 3.4169740676879883,
+      "eval_runtime": 55.9578,
+      "eval_samples_per_second": 310.287,
+      "eval_steps_per_second": 7.059,
+      "step": 13050
+    },
+    {
+      "epoch": 40.0,
+      "grad_norm": 8.286705269711092e-05,
+      "learning_rate": 6.002298850574713e-06,
+      "loss": 0.0827,
+      "step": 17400
+    },
+    {
+      "epoch": 40.0,
+      "eval_loss": 3.3203234672546387,
+      "eval_runtime": 55.9217,
+      "eval_samples_per_second": 310.488,
+      "eval_steps_per_second": 7.063,
+      "step": 17400
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 0.0003959204477723688,
+      "learning_rate": 5.002758620689656e-06,
+      "loss": 0.0755,
+      "step": 21750
+    },
+    {
+      "epoch": 50.0,
+      "eval_loss": 3.196115732192993,
+      "eval_runtime": 55.4125,
+      "eval_samples_per_second": 313.341,
+      "eval_steps_per_second": 7.128,
+      "step": 21750
+    },
+    {
+      "epoch": 60.0,
+      "grad_norm": 7.149695011321455e-05,
+      "learning_rate": 4.003218390804598e-06,
+      "loss": 0.0685,
+      "step": 26100
+    },
+    {
+      "epoch": 60.0,
+      "eval_loss": 3.110957145690918,
+      "eval_runtime": 55.9729,
+      "eval_samples_per_second": 310.204,
+      "eval_steps_per_second": 7.057,
+      "step": 26100
+    },
+    {
+      "epoch": 70.0,
+      "grad_norm": 0.013322222046554089,
+      "learning_rate": 3.0036781609195404e-06,
+      "loss": 0.0664,
+      "step": 30450
+    },
+    {
+      "epoch": 70.0,
+      "eval_loss": 3.0240492820739746,
+      "eval_runtime": 56.1145,
+      "eval_samples_per_second": 309.421,
+      "eval_steps_per_second": 7.039,
+      "step": 30450
+    },
+    {
+      "epoch": 80.0,
+      "grad_norm": 2.9004069801885635e-05,
+      "learning_rate": 2.004137931034483e-06,
+      "loss": 0.0621,
+      "step": 34800
+    },
+    {
+      "epoch": 80.0,
+      "eval_loss": 2.9804065227508545,
+      "eval_runtime": 55.7487,
+      "eval_samples_per_second": 311.451,
+      "eval_steps_per_second": 7.085,
+      "step": 34800
+    },
+    {
+      "epoch": 90.0,
+      "grad_norm": 0.00013569927250500768,
+      "learning_rate": 1.0045977011494254e-06,
+      "loss": 0.0596,
+      "step": 39150
+    },
+    {
+      "epoch": 90.0,
+      "eval_loss": 2.895709276199341,
+      "eval_runtime": 55.8728,
+      "eval_samples_per_second": 310.76,
+      "eval_steps_per_second": 7.07,
+      "step": 39150
+    },
+    {
+      "epoch": 100.0,
+      "grad_norm": 0.0004280584107618779,
+      "learning_rate": 4.827586206896552e-09,
+      "loss": 0.0576,
+      "step": 43500
+    },
+    {
+      "epoch": 100.0,
+      "eval_loss": 2.854401111602783,
+      "eval_runtime": 55.466,
+      "eval_samples_per_second": 313.038,
+      "eval_steps_per_second": 7.121,
+      "step": 43500
+    },
+    {
+      "epoch": 100.0,
+      "step": 43500,
+      "total_flos": 6.859464121840128e+17,
+      "train_loss": 0.11642176906541846,
+      "train_runtime": 15360.6557,
+      "train_samples_per_second": 113.036,
+      "train_steps_per_second": 2.832
+    }
+  ],
+  "logging_steps": 4350,
+  "max_steps": 43500,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 100,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6.859464121840128e+17,
+  "train_batch_size": 40,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:672ded44c894888ffb4d53a171411f24e6199dbc2ce094390f484b37111e6270
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d0c33571f676a741e7e4489b9403c96f61708c4e1bb8dfa823d6fe0757fa09b
 size 5112

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff