Instructions to use JulesGo/vit_focus with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use JulesGo/vit_focus with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("JulesGo/vit_focus", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_global_step": 120, | |
| "best_metric": 0.12533096969127655, | |
| "best_model_checkpoint": "./vit_focus/checkpoint-120", | |
| "epoch": 27.0, | |
| "eval_steps": 500, | |
| "global_step": 270, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.0993029847741127, | |
| "eval_mae": 0.3380415141582489, | |
| "eval_mse": 0.15286438167095184, | |
| "eval_runtime": 9.9578, | |
| "eval_samples_per_second": 7.632, | |
| "eval_steps_per_second": 1.004, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.1050349548459053, | |
| "eval_mae": 0.34093156456947327, | |
| "eval_mse": 0.15535660088062286, | |
| "eval_runtime": 9.7265, | |
| "eval_samples_per_second": 7.814, | |
| "eval_steps_per_second": 1.028, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 0.09966066479682922, | |
| "eval_mae": 0.3352396786212921, | |
| "eval_mse": 0.14932329952716827, | |
| "eval_runtime": 9.4644, | |
| "eval_samples_per_second": 8.03, | |
| "eval_steps_per_second": 1.057, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 6.926674842834473, | |
| "learning_rate": 4.277777777777778e-05, | |
| "loss": 0.313, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 0.06556536257266998, | |
| "eval_mae": 0.3157392740249634, | |
| "eval_mse": 0.13447947800159454, | |
| "eval_runtime": 10.1308, | |
| "eval_samples_per_second": 7.502, | |
| "eval_steps_per_second": 0.987, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 0.06592569500207901, | |
| "eval_mae": 0.3202681839466095, | |
| "eval_mse": 0.13659903407096863, | |
| "eval_runtime": 9.7043, | |
| "eval_samples_per_second": 7.832, | |
| "eval_steps_per_second": 1.03, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 0.0638844296336174, | |
| "eval_mae": 0.31192123889923096, | |
| "eval_mse": 0.12961846590042114, | |
| "eval_runtime": 9.5179, | |
| "eval_samples_per_second": 7.985, | |
| "eval_steps_per_second": 1.051, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 0.06389027088880539, | |
| "eval_mae": 0.3178236484527588, | |
| "eval_mse": 0.13510307669639587, | |
| "eval_runtime": 9.5025, | |
| "eval_samples_per_second": 7.998, | |
| "eval_steps_per_second": 1.052, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 4.451300144195557, | |
| "learning_rate": 3.537037037037037e-05, | |
| "loss": 0.1742, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 0.06391099840402603, | |
| "eval_mae": 0.3085971772670746, | |
| "eval_mse": 0.12736700475215912, | |
| "eval_runtime": 10.1592, | |
| "eval_samples_per_second": 7.481, | |
| "eval_steps_per_second": 0.984, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 0.07279632240533829, | |
| "eval_mae": 0.3096161186695099, | |
| "eval_mse": 0.12943950295448303, | |
| "eval_runtime": 9.4821, | |
| "eval_samples_per_second": 8.015, | |
| "eval_steps_per_second": 1.055, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_loss": 0.06712160259485245, | |
| "eval_mae": 0.3150458335876465, | |
| "eval_mse": 0.13300836086273193, | |
| "eval_runtime": 9.7046, | |
| "eval_samples_per_second": 7.831, | |
| "eval_steps_per_second": 1.03, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_loss": 0.06695493310689926, | |
| "eval_mae": 0.30665045976638794, | |
| "eval_mse": 0.12600918114185333, | |
| "eval_runtime": 9.7852, | |
| "eval_samples_per_second": 7.767, | |
| "eval_steps_per_second": 1.022, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 2.9957473278045654, | |
| "learning_rate": 2.7962962962962965e-05, | |
| "loss": 0.1284, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_loss": 0.06580791622400284, | |
| "eval_mae": 0.3059428930282593, | |
| "eval_mse": 0.12533096969127655, | |
| "eval_runtime": 9.7135, | |
| "eval_samples_per_second": 7.824, | |
| "eval_steps_per_second": 1.029, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_loss": 0.06405826658010483, | |
| "eval_mae": 0.3104270100593567, | |
| "eval_mse": 0.1280805468559265, | |
| "eval_runtime": 12.0741, | |
| "eval_samples_per_second": 6.294, | |
| "eval_steps_per_second": 0.828, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_loss": 0.06428611278533936, | |
| "eval_mae": 0.3104848563671112, | |
| "eval_mse": 0.12893278896808624, | |
| "eval_runtime": 9.5891, | |
| "eval_samples_per_second": 7.926, | |
| "eval_steps_per_second": 1.043, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_loss": 0.06487523764371872, | |
| "eval_mae": 0.3171584904193878, | |
| "eval_mse": 0.13420797884464264, | |
| "eval_runtime": 9.5632, | |
| "eval_samples_per_second": 7.947, | |
| "eval_steps_per_second": 1.046, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 1.922245740890503, | |
| "learning_rate": 2.0555555555555555e-05, | |
| "loss": 0.0981, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_loss": 0.06558659672737122, | |
| "eval_mae": 0.30849871039390564, | |
| "eval_mse": 0.12756428122520447, | |
| "eval_runtime": 9.5905, | |
| "eval_samples_per_second": 7.924, | |
| "eval_steps_per_second": 1.043, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_loss": 0.06274469941854477, | |
| "eval_mae": 0.3136182427406311, | |
| "eval_mse": 0.13160544633865356, | |
| "eval_runtime": 10.0109, | |
| "eval_samples_per_second": 7.592, | |
| "eval_steps_per_second": 0.999, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_loss": 0.06201491877436638, | |
| "eval_mae": 0.3168633282184601, | |
| "eval_mse": 0.1343080997467041, | |
| "eval_runtime": 9.9918, | |
| "eval_samples_per_second": 7.606, | |
| "eval_steps_per_second": 1.001, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_loss": 0.0631915032863617, | |
| "eval_mae": 0.31292420625686646, | |
| "eval_mse": 0.13110676407814026, | |
| "eval_runtime": 9.5351, | |
| "eval_samples_per_second": 7.971, | |
| "eval_steps_per_second": 1.049, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 1.9687647819519043, | |
| "learning_rate": 1.3148148148148148e-05, | |
| "loss": 0.0767, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_loss": 0.06296339631080627, | |
| "eval_mae": 0.3142727017402649, | |
| "eval_mse": 0.1326274573802948, | |
| "eval_runtime": 9.7999, | |
| "eval_samples_per_second": 7.755, | |
| "eval_steps_per_second": 1.02, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_loss": 0.06408733129501343, | |
| "eval_mae": 0.311717689037323, | |
| "eval_mse": 0.12986762821674347, | |
| "eval_runtime": 9.6462, | |
| "eval_samples_per_second": 7.879, | |
| "eval_steps_per_second": 1.037, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_loss": 0.06340750306844711, | |
| "eval_mae": 0.3114081621170044, | |
| "eval_mse": 0.12940751016139984, | |
| "eval_runtime": 9.5394, | |
| "eval_samples_per_second": 7.967, | |
| "eval_steps_per_second": 1.048, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_loss": 0.06285858899354935, | |
| "eval_mae": 0.31304195523262024, | |
| "eval_mse": 0.13149800896644592, | |
| "eval_runtime": 9.8923, | |
| "eval_samples_per_second": 7.683, | |
| "eval_steps_per_second": 1.011, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 1.0159116983413696, | |
| "learning_rate": 5.740740740740741e-06, | |
| "loss": 0.0615, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_loss": 0.06115531921386719, | |
| "eval_mae": 0.3123721480369568, | |
| "eval_mse": 0.13078482449054718, | |
| "eval_runtime": 9.6638, | |
| "eval_samples_per_second": 7.864, | |
| "eval_steps_per_second": 1.035, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_loss": 0.059913910925388336, | |
| "eval_mae": 0.31175902485847473, | |
| "eval_mse": 0.13015513122081757, | |
| "eval_runtime": 9.6921, | |
| "eval_samples_per_second": 7.841, | |
| "eval_steps_per_second": 1.032, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_loss": 0.06085545942187309, | |
| "eval_mae": 0.313151478767395, | |
| "eval_mse": 0.13129989802837372, | |
| "eval_runtime": 9.5449, | |
| "eval_samples_per_second": 7.962, | |
| "eval_steps_per_second": 1.048, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_loss": 0.060885023325681686, | |
| "eval_mae": 0.3116842210292816, | |
| "eval_mse": 0.13011318445205688, | |
| "eval_runtime": 9.6423, | |
| "eval_samples_per_second": 7.882, | |
| "eval_steps_per_second": 1.037, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "step": 270, | |
| "total_flos": 0.0, | |
| "train_loss": 0.13240765200720894, | |
| "train_runtime": 1534.1197, | |
| "train_samples_per_second": 5.925, | |
| "train_steps_per_second": 0.176 | |
| } | |
| ], | |
| "logging_steps": 40, | |
| "max_steps": 270, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |