marcelovidigal commited on
Commit
44664b1
1 Parent(s): 3c72947

Training in progress, epoch 47

Browse files
.gitattributes CHANGED
@@ -34,3 +34,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  wandb/run-20240924_172630-x9iddikd/run-x9iddikd.wandb filter=lfs diff=lfs merge=lfs -text
 
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  wandb/run-20240924_172630-x9iddikd/run-x9iddikd.wandb filter=lfs diff=lfs merge=lfs -text
37
+ wandb/debug-internal.log filter=lfs diff=lfs merge=lfs -text
38
+ wandb/run-20240924_172630-x9iddikd/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1144d9e6d049aeac81feb9696eaad5d7bc856258d669fbe1084546edbaa18b60
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3335c1da6b2d61ae7ee30323d9e4f65bc16b91baaa8c2627dfcff66458e70e02
3
  size 267832560
wandb/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240924_172630-x9iddikd/files/output.log CHANGED
@@ -68,3 +68,24 @@ wandb: Network error (SSLError), entering retry loop.
68
  {'loss': 0.0101, 'grad_norm': 0.0740148276090622, 'learning_rate': 4.4e-06, 'epoch': 28.0}
69
  {'eval_loss': 0.7997801303863525, 'eval_accuracy': 0.897, 'eval_runtime': 38.1717, 'eval_samples_per_second': 26.197, 'eval_steps_per_second': 0.838, 'epoch': 28.0}
70
  {'eval_loss': 0.7122868895530701, 'eval_accuracy': 0.903, 'eval_runtime': 38.0461, 'eval_samples_per_second': 26.284, 'eval_steps_per_second': 0.841, 'epoch': 29.0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  {'loss': 0.0101, 'grad_norm': 0.0740148276090622, 'learning_rate': 4.4e-06, 'epoch': 28.0}
69
  {'eval_loss': 0.7997801303863525, 'eval_accuracy': 0.897, 'eval_runtime': 38.1717, 'eval_samples_per_second': 26.197, 'eval_steps_per_second': 0.838, 'epoch': 28.0}
70
  {'eval_loss': 0.7122868895530701, 'eval_accuracy': 0.903, 'eval_runtime': 38.0461, 'eval_samples_per_second': 26.284, 'eval_steps_per_second': 0.841, 'epoch': 29.0}
71
+ {'eval_loss': 0.7891318798065186, 'eval_accuracy': 0.9, 'eval_runtime': 37.564, 'eval_samples_per_second': 26.621, 'eval_steps_per_second': 0.852, 'epoch': 30.0}
72
+ {'eval_loss': 0.6890597343444824, 'eval_accuracy': 0.903, 'eval_runtime': 51.1931, 'eval_samples_per_second': 19.534, 'eval_steps_per_second': 0.625, 'epoch': 31.0}
73
+ {'loss': 0.0089, 'grad_norm': 0.007422878406941891, 'learning_rate': 3.6000000000000003e-06, 'epoch': 32.0}
74
+ {'eval_loss': 0.6430081129074097, 'eval_accuracy': 0.912, 'eval_runtime': 38.3463, 'eval_samples_per_second': 26.078, 'eval_steps_per_second': 0.835, 'epoch': 32.0}
75
+ {'eval_loss': 0.6644126176834106, 'eval_accuracy': 0.912, 'eval_runtime': 37.0865, 'eval_samples_per_second': 26.964, 'eval_steps_per_second': 0.863, 'epoch': 33.0}
76
+ {'eval_loss': 0.6276940703392029, 'eval_accuracy': 0.914, 'eval_runtime': 37.8123, 'eval_samples_per_second': 26.446, 'eval_steps_per_second': 0.846, 'epoch': 34.0}
77
+ {'eval_loss': 0.6321740746498108, 'eval_accuracy': 0.917, 'eval_runtime': 50.725, 'eval_samples_per_second': 19.714, 'eval_steps_per_second': 0.631, 'epoch': 35.0}
78
+ {'loss': 0.0078, 'grad_norm': 0.00712945219129324, 'learning_rate': 2.8000000000000003e-06, 'epoch': 36.0}
79
+ {'eval_loss': 0.7095584869384766, 'eval_accuracy': 0.908, 'eval_runtime': 37.1239, 'eval_samples_per_second': 26.937, 'eval_steps_per_second': 0.862, 'epoch': 36.0}
80
+ {'eval_loss': 0.649186909198761, 'eval_accuracy': 0.911, 'eval_runtime': 37.5593, 'eval_samples_per_second': 26.625, 'eval_steps_per_second': 0.852, 'epoch': 37.0}
81
+ {'eval_loss': 0.6124615669250488, 'eval_accuracy': 0.915, 'eval_runtime': 41.4101, 'eval_samples_per_second': 24.149, 'eval_steps_per_second': 0.773, 'epoch': 38.0}
82
+ {'eval_loss': 0.7363823056221008, 'eval_accuracy': 0.904, 'eval_runtime': 47.3084, 'eval_samples_per_second': 21.138, 'eval_steps_per_second': 0.676, 'epoch': 39.0}
83
+ {'loss': 0.0054, 'grad_norm': 0.013953677378594875, 'learning_rate': 2.0000000000000003e-06, 'epoch': 40.0}
84
+ {'eval_loss': 0.6578059196472168, 'eval_accuracy': 0.913, 'eval_runtime': 37.731, 'eval_samples_per_second': 26.503, 'eval_steps_per_second': 0.848, 'epoch': 40.0}
85
+ {'eval_loss': 0.7589854598045349, 'eval_accuracy': 0.906, 'eval_runtime': 37.3152, 'eval_samples_per_second': 26.799, 'eval_steps_per_second': 0.858, 'epoch': 41.0}
86
+ {'eval_loss': 0.7142490744590759, 'eval_accuracy': 0.906, 'eval_runtime': 37.6936, 'eval_samples_per_second': 26.53, 'eval_steps_per_second': 0.849, 'epoch': 42.0}
87
+ {'eval_loss': 0.759125292301178, 'eval_accuracy': 0.903, 'eval_runtime': 37.463, 'eval_samples_per_second': 26.693, 'eval_steps_per_second': 0.854, 'epoch': 43.0}
88
+ {'loss': 0.0049, 'grad_norm': 0.007690785452723503, 'learning_rate': 1.2000000000000002e-06, 'epoch': 44.0}
89
+ {'eval_loss': 0.6526206731796265, 'eval_accuracy': 0.917, 'eval_runtime': 37.8543, 'eval_samples_per_second': 26.417, 'eval_steps_per_second': 0.845, 'epoch': 44.0}
90
+ {'eval_loss': 0.6948218941688538, 'eval_accuracy': 0.909, 'eval_runtime': 37.9107, 'eval_samples_per_second': 26.378, 'eval_steps_per_second': 0.844, 'epoch': 45.0}
91
+ {'eval_loss': 0.7213398218154907, 'eval_accuracy': 0.907, 'eval_runtime': 38.4455, 'eval_samples_per_second': 26.011, 'eval_steps_per_second': 0.832, 'epoch': 46.0}
wandb/run-20240924_172630-x9iddikd/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"eval/loss": 0.7891318798065186, "eval/accuracy": 0.9, "eval/runtime": 37.564, "eval/samples_per_second": 26.621, "eval/steps_per_second": 0.852, "train/epoch": 30.0, "train/global_step": 3750, "_timestamp": 1727274406.976772, "_runtime": 64816.10386300087, "_step": 44, "train/loss": 0.0101, "train/grad_norm": 0.0740148276090622, "train/learning_rate": 4.4e-06, "train_runtime": 8026.8642, "train_samples_per_second": 2.492, "train_steps_per_second": 0.156, "total_flos": 2396475988298112.0, "train_loss": 0.11480112991333008}
 
1
+ {"eval/loss": 0.6751002669334412, "eval/accuracy": 0.913, "eval/runtime": 38.6152, "eval/samples_per_second": 25.897, "eval/steps_per_second": 0.829, "train/epoch": 47.0, "train/global_step": 5875, "_timestamp": 1727305060.453245, "_runtime": 95469.58033585548, "_step": 65, "train/loss": 0.0049, "train/grad_norm": 0.007690785452723503, "train/learning_rate": 1.2000000000000002e-06, "train_runtime": 8026.8642, "train_samples_per_second": 2.492, "train_steps_per_second": 0.156, "total_flos": 2396475988298112.0, "train_loss": 0.11480112991333008}
wandb/run-20240924_172630-x9iddikd/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240924_172630-x9iddikd/run-x9iddikd.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4178b68a90e05a401884b180ae831e5b396871ea4f4da50ef7838b84d97f3b87
3
- size 1769746
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a52498c342f7cb3cb48ad2f5a25f3615353f63cc594a247e338f844424f16a8d
3
+ size 2600614