Translation
Transformers
PyTorch
English
Romanian
mt5
text2text-generation
wmt16
Lvxue
Eval Results (legacy)
Instructions to use Lvxue/finetuned-mt5-small-10epoch with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Lvxue/finetuned-mt5-small-10epoch with Transformers:
# Use a pipeline as a high-level helper # Warning: Pipeline type "translation" is no longer supported in transformers v5. # You must load the model directly (see below) or downgrade to v4.x with: # 'pip install "transformers<5.0.0' from transformers import pipeline pipe = pipeline("translation", model="Lvxue/finetuned-mt5-small-10epoch")# Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("Lvxue/finetuned-mt5-small-10epoch") model = AutoModelForSeq2SeqLM.from_pretrained("Lvxue/finetuned-mt5-small-10epoch") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 10.0, | |
| "global_step": 127150, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.980338183248132e-05, | |
| "loss": 8.0518, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.9606763664962646e-05, | |
| "loss": 3.358, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.9410145497443964e-05, | |
| "loss": 2.9014, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.921352732992528e-05, | |
| "loss": 2.6617, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.901690916240661e-05, | |
| "loss": 2.4945, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.882029099488793e-05, | |
| "loss": 2.3686, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.862367282736925e-05, | |
| "loss": 2.2674, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.8427054659850575e-05, | |
| "loss": 2.1889, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.8230436492331893e-05, | |
| "loss": 2.1107, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.803381832481321e-05, | |
| "loss": 2.0427, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 4.783720015729454e-05, | |
| "loss": 1.9907, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.7640581989775855e-05, | |
| "loss": 1.9401, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.744396382225718e-05, | |
| "loss": 1.9027, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.72473456547385e-05, | |
| "loss": 1.8655, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.705072748721982e-05, | |
| "loss": 1.823, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.685410931970115e-05, | |
| "loss": 1.7995, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 4.6657491152182466e-05, | |
| "loss": 1.7682, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.6460872984663785e-05, | |
| "loss": 1.7367, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 4.626425481714511e-05, | |
| "loss": 1.7111, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 4.606763664962643e-05, | |
| "loss": 1.6903, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.5871018482107746e-05, | |
| "loss": 1.6655, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.567440031458907e-05, | |
| "loss": 1.6468, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.547778214707039e-05, | |
| "loss": 1.6196, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.528116397955171e-05, | |
| "loss": 1.6017, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.508454581203303e-05, | |
| "loss": 1.5889, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.488792764451436e-05, | |
| "loss": 1.5679, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 4.4691309476995676e-05, | |
| "loss": 1.5419, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.4494691309477e-05, | |
| "loss": 1.5339, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.429807314195832e-05, | |
| "loss": 1.5189, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 4.410145497443964e-05, | |
| "loss": 1.5071, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.390483680692096e-05, | |
| "loss": 1.4936, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.370821863940228e-05, | |
| "loss": 1.4849, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 4.3511600471883605e-05, | |
| "loss": 1.4671, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 4.3314982304364923e-05, | |
| "loss": 1.4519, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.311836413684625e-05, | |
| "loss": 1.4411, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 4.2921745969327573e-05, | |
| "loss": 1.4355, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 4.272512780180889e-05, | |
| "loss": 1.4224, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.252850963429021e-05, | |
| "loss": 1.4221, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.2331891466771535e-05, | |
| "loss": 1.4053, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.213527329925285e-05, | |
| "loss": 1.394, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 4.193865513173417e-05, | |
| "loss": 1.3904, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 4.1742036964215496e-05, | |
| "loss": 1.377, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 4.1545418796696815e-05, | |
| "loss": 1.3735, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 4.134880062917813e-05, | |
| "loss": 1.364, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 4.115218246165946e-05, | |
| "loss": 1.3605, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 4.095556429414078e-05, | |
| "loss": 1.3539, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 4.07589461266221e-05, | |
| "loss": 1.3397, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 4.0562327959103426e-05, | |
| "loss": 1.3326, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 4.0365709791584744e-05, | |
| "loss": 1.3295, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 4.016909162406606e-05, | |
| "loss": 1.3297, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 3.997247345654739e-05, | |
| "loss": 1.312, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.9775855289028706e-05, | |
| "loss": 1.2981, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 3.957923712151003e-05, | |
| "loss": 1.2988, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 3.938261895399135e-05, | |
| "loss": 1.289, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.9186000786472674e-05, | |
| "loss": 1.2861, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 3.8989382618954e-05, | |
| "loss": 1.2823, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.879276445143532e-05, | |
| "loss": 1.2718, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.8596146283916635e-05, | |
| "loss": 1.2673, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 3.839952811639796e-05, | |
| "loss": 1.2672, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 3.820290994887928e-05, | |
| "loss": 1.2538, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.80062917813606e-05, | |
| "loss": 1.2533, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 3.780967361384192e-05, | |
| "loss": 1.2519, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.761305544632324e-05, | |
| "loss": 1.2437, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.741643727880456e-05, | |
| "loss": 1.2363, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 3.721981911128588e-05, | |
| "loss": 1.2397, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 3.702320094376721e-05, | |
| "loss": 1.2328, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 3.6826582776248526e-05, | |
| "loss": 1.2336, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 3.662996460872985e-05, | |
| "loss": 1.2271, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.643334644121117e-05, | |
| "loss": 1.2199, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 3.623672827369249e-05, | |
| "loss": 1.2167, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.604011010617381e-05, | |
| "loss": 1.2115, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 3.584349193865513e-05, | |
| "loss": 1.2095, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 3.564687377113645e-05, | |
| "loss": 1.2043, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 3.5450255603617774e-05, | |
| "loss": 1.2029, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.52536374360991e-05, | |
| "loss": 1.1952, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 3.5057019268580424e-05, | |
| "loss": 1.1916, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 3.486040110106174e-05, | |
| "loss": 1.183, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 3.466378293354306e-05, | |
| "loss": 1.1823, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 3.4467164766024386e-05, | |
| "loss": 1.1806, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 3.4270546598505704e-05, | |
| "loss": 1.182, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 3.407392843098702e-05, | |
| "loss": 1.1705, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 3.387731026346835e-05, | |
| "loss": 1.174, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 3.3680692095949665e-05, | |
| "loss": 1.1634, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 3.3484073928430984e-05, | |
| "loss": 1.1655, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 3.328745576091231e-05, | |
| "loss": 1.1622, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.3090837593393633e-05, | |
| "loss": 1.161, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 3.289421942587495e-05, | |
| "loss": 1.1495, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 3.269760125835628e-05, | |
| "loss": 1.1522, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 3.2500983090837595e-05, | |
| "loss": 1.1483, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 3.230436492331891e-05, | |
| "loss": 1.1516, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 3.210774675580024e-05, | |
| "loss": 1.1475, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 3.1911128588281556e-05, | |
| "loss": 1.1386, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.1714510420762875e-05, | |
| "loss": 1.142, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 3.15178922532442e-05, | |
| "loss": 1.1403, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 3.1321274085725525e-05, | |
| "loss": 1.1328, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 3.112465591820685e-05, | |
| "loss": 1.1257, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 3.092803775068817e-05, | |
| "loss": 1.1259, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 3.0731419583169486e-05, | |
| "loss": 1.128, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 3.053480141565081e-05, | |
| "loss": 1.1299, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 3.033818324813213e-05, | |
| "loss": 1.1285, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 3.0141565080613447e-05, | |
| "loss": 1.1224, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 2.9944946913094772e-05, | |
| "loss": 1.116, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 2.9748328745576094e-05, | |
| "loss": 1.1125, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 2.9551710578057412e-05, | |
| "loss": 1.1031, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 2.9355092410538737e-05, | |
| "loss": 1.1091, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 2.9158474243020055e-05, | |
| "loss": 1.1058, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 2.8961856075501377e-05, | |
| "loss": 1.1025, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 2.8765237907982702e-05, | |
| "loss": 1.1076, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 2.856861974046402e-05, | |
| "loss": 1.1052, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 2.837200157294534e-05, | |
| "loss": 1.0995, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 2.8175383405426664e-05, | |
| "loss": 1.0948, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 2.7978765237907985e-05, | |
| "loss": 1.0985, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 2.7782147070389303e-05, | |
| "loss": 1.0969, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 2.758552890287063e-05, | |
| "loss": 1.0921, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 2.7388910735351947e-05, | |
| "loss": 1.0913, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 2.719229256783327e-05, | |
| "loss": 1.0883, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 4.6, | |
| "learning_rate": 2.699567440031459e-05, | |
| "loss": 1.0859, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 4.64, | |
| "learning_rate": 2.679905623279591e-05, | |
| "loss": 1.0898, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 2.6602438065277236e-05, | |
| "loss": 1.0809, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 2.6405819897758555e-05, | |
| "loss": 1.084, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 2.6209201730239873e-05, | |
| "loss": 1.0767, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 2.6012583562721198e-05, | |
| "loss": 1.0709, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 2.581596539520252e-05, | |
| "loss": 1.0789, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 2.5619347227683838e-05, | |
| "loss": 1.0779, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 2.5422729060165163e-05, | |
| "loss": 1.0732, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 2.522611089264648e-05, | |
| "loss": 1.0718, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 2.5029492725127802e-05, | |
| "loss": 1.0665, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 2.4832874557609124e-05, | |
| "loss": 1.0673, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 2.4636256390090446e-05, | |
| "loss": 1.0603, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 2.4439638222571767e-05, | |
| "loss": 1.0557, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "learning_rate": 2.4243020055053085e-05, | |
| "loss": 1.0563, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 2.404640188753441e-05, | |
| "loss": 1.0557, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 2.3849783720015732e-05, | |
| "loss": 1.0586, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 2.3653165552497054e-05, | |
| "loss": 1.0574, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 2.3456547384978372e-05, | |
| "loss": 1.0578, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 2.3259929217459694e-05, | |
| "loss": 1.0569, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 2.3063311049941015e-05, | |
| "loss": 1.0541, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 2.2866692882422337e-05, | |
| "loss": 1.0532, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 2.267007471490366e-05, | |
| "loss": 1.0538, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 2.247345654738498e-05, | |
| "loss": 1.0467, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 2.2276838379866298e-05, | |
| "loss": 1.0483, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 5.58, | |
| "learning_rate": 2.2080220212347623e-05, | |
| "loss": 1.0565, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 2.1883602044828945e-05, | |
| "loss": 1.0494, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 2.1686983877310266e-05, | |
| "loss": 1.0423, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 2.1490365709791585e-05, | |
| "loss": 1.0449, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 2.1293747542272906e-05, | |
| "loss": 1.0447, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 2.1097129374754228e-05, | |
| "loss": 1.0411, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 5.82, | |
| "learning_rate": 2.090051120723555e-05, | |
| "loss": 1.0424, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 5.86, | |
| "learning_rate": 2.070389303971687e-05, | |
| "loss": 1.0394, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 2.0507274872198193e-05, | |
| "loss": 1.035, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 2.031065670467951e-05, | |
| "loss": 1.0343, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 2.0114038537160836e-05, | |
| "loss": 1.0371, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 6.02, | |
| "learning_rate": 1.9917420369642157e-05, | |
| "loss": 1.0269, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 1.972080220212348e-05, | |
| "loss": 1.0361, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 1.9524184034604797e-05, | |
| "loss": 1.0291, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 1.932756586708612e-05, | |
| "loss": 1.0275, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 1.913094769956744e-05, | |
| "loss": 1.0308, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 1.8934329532048762e-05, | |
| "loss": 1.0259, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 1.8737711364530084e-05, | |
| "loss": 1.0238, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 1.8541093197011405e-05, | |
| "loss": 1.0263, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 1.8344475029492724e-05, | |
| "loss": 1.024, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 1.814785686197405e-05, | |
| "loss": 1.0217, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 1.795123869445537e-05, | |
| "loss": 1.0229, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 1.7754620526936692e-05, | |
| "loss": 1.0211, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 1.755800235941801e-05, | |
| "loss": 1.0143, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 1.736138419189933e-05, | |
| "loss": 1.0234, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 1.7164766024380653e-05, | |
| "loss": 1.0182, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 1.6968147856861975e-05, | |
| "loss": 1.0163, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.6771529689343296e-05, | |
| "loss": 1.0143, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.6574911521824618e-05, | |
| "loss": 1.0184, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 1.6378293354305936e-05, | |
| "loss": 1.0183, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 1.618167518678726e-05, | |
| "loss": 1.0174, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 1.5985057019268583e-05, | |
| "loss": 1.0151, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 1.5788438851749904e-05, | |
| "loss": 1.0116, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 1.5591820684231223e-05, | |
| "loss": 1.0114, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 1.5395202516712544e-05, | |
| "loss": 1.0144, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 1.5198584349193868e-05, | |
| "loss": 1.0106, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.5001966181675187e-05, | |
| "loss": 1.0196, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 1.4805348014156509e-05, | |
| "loss": 1.0041, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 1.460872984663783e-05, | |
| "loss": 1.0061, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 1.441211167911915e-05, | |
| "loss": 1.0013, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 1.4215493511600472e-05, | |
| "loss": 1.0085, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 1.4018875344081794e-05, | |
| "loss": 1.0028, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 7.24, | |
| "learning_rate": 1.3822257176563117e-05, | |
| "loss": 1.0028, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 1.3625639009044435e-05, | |
| "loss": 0.9988, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 1.3429020841525759e-05, | |
| "loss": 1.0067, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 1.323240267400708e-05, | |
| "loss": 1.0072, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 1.30357845064884e-05, | |
| "loss": 1.0001, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 1.2839166338969722e-05, | |
| "loss": 1.0012, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 1.2642548171451043e-05, | |
| "loss": 1.0003, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 1.2445930003932365e-05, | |
| "loss": 0.9877, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 1.2249311836413685e-05, | |
| "loss": 1.0001, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 1.2052693668895006e-05, | |
| "loss": 0.9954, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 1.1856075501376328e-05, | |
| "loss": 0.9963, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 1.1659457333857648e-05, | |
| "loss": 0.994, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 1.1462839166338971e-05, | |
| "loss": 1.003, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 1.1266220998820291e-05, | |
| "loss": 0.9931, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 7.79, | |
| "learning_rate": 1.1069602831301613e-05, | |
| "loss": 0.9983, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 1.0872984663782934e-05, | |
| "loss": 0.9922, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 7.86, | |
| "learning_rate": 1.0676366496264254e-05, | |
| "loss": 0.9968, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 1.0479748328745578e-05, | |
| "loss": 0.9956, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 1.0283130161226898e-05, | |
| "loss": 0.991, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 7.98, | |
| "learning_rate": 1.008651199370822e-05, | |
| "loss": 0.9915, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 8.02, | |
| "learning_rate": 9.88989382618954e-06, | |
| "loss": 0.9859, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 9.69327565867086e-06, | |
| "loss": 0.986, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 8.1, | |
| "learning_rate": 9.496657491152184e-06, | |
| "loss": 0.9937, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 9.300039323633504e-06, | |
| "loss": 0.9863, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "learning_rate": 9.103421156114826e-06, | |
| "loss": 0.9836, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 8.906802988596147e-06, | |
| "loss": 0.9857, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 8.710184821077467e-06, | |
| "loss": 0.989, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 8.3, | |
| "learning_rate": 8.51356665355879e-06, | |
| "loss": 0.9886, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 8.31694848604011e-06, | |
| "loss": 0.9885, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 8.120330318521432e-06, | |
| "loss": 0.9847, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 7.923712151002753e-06, | |
| "loss": 0.9846, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 7.727093983484073e-06, | |
| "loss": 0.9882, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 8.49, | |
| "learning_rate": 7.530475815965396e-06, | |
| "loss": 0.9866, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 7.333857648446717e-06, | |
| "loss": 0.9823, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 8.57, | |
| "learning_rate": 7.137239480928037e-06, | |
| "loss": 0.9841, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 6.94062131340936e-06, | |
| "loss": 0.9844, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "learning_rate": 6.7440031458906806e-06, | |
| "loss": 0.9817, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 6.547384978372002e-06, | |
| "loss": 0.9854, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 6.350766810853323e-06, | |
| "loss": 0.9845, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 6.1541486433346445e-06, | |
| "loss": 0.984, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 5.957530475815966e-06, | |
| "loss": 0.9865, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 5.760912308297287e-06, | |
| "loss": 0.9765, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 5.564294140778608e-06, | |
| "loss": 0.9867, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 8.93, | |
| "learning_rate": 5.367675973259929e-06, | |
| "loss": 0.986, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 5.171057805741251e-06, | |
| "loss": 0.9817, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 4.9744396382225725e-06, | |
| "loss": 0.9774, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "learning_rate": 4.777821470703893e-06, | |
| "loss": 0.9811, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 9.08, | |
| "learning_rate": 4.581203303185214e-06, | |
| "loss": 0.9759, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 9.12, | |
| "learning_rate": 4.384585135666536e-06, | |
| "loss": 0.9769, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 4.187966968147857e-06, | |
| "loss": 0.9807, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 3.991348800629179e-06, | |
| "loss": 0.9822, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 3.794730633110499e-06, | |
| "loss": 0.9843, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 3.5981124655918208e-06, | |
| "loss": 0.9777, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 3.401494298073142e-06, | |
| "loss": 0.977, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "learning_rate": 3.2048761305544636e-06, | |
| "loss": 0.9785, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "learning_rate": 3.0082579630357847e-06, | |
| "loss": 0.9756, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 2.811639795517106e-06, | |
| "loss": 0.9803, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 2.615021627998427e-06, | |
| "loss": 0.9774, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 2.4184034604797483e-06, | |
| "loss": 0.984, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 9.56, | |
| "learning_rate": 2.22178529296107e-06, | |
| "loss": 0.9753, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 9.59, | |
| "learning_rate": 2.025167125442391e-06, | |
| "loss": 0.9764, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 1.8285489579237123e-06, | |
| "loss": 0.9756, | |
| "step": 122500 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 1.6319307904050335e-06, | |
| "loss": 0.9733, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 9.71, | |
| "learning_rate": 1.4353126228863549e-06, | |
| "loss": 0.9726, | |
| "step": 123500 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 1.238694455367676e-06, | |
| "loss": 0.9783, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 9.79, | |
| "learning_rate": 1.0420762878489972e-06, | |
| "loss": 0.9786, | |
| "step": 124500 | |
| }, | |
| { | |
| "epoch": 9.83, | |
| "learning_rate": 8.454581203303186e-07, | |
| "loss": 0.9802, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 6.488399528116398e-07, | |
| "loss": 0.979, | |
| "step": 125500 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 4.522217852929611e-07, | |
| "loss": 0.978, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 2.556036177742823e-07, | |
| "loss": 0.9718, | |
| "step": 126500 | |
| }, | |
| { | |
| "epoch": 9.99, | |
| "learning_rate": 5.898545025560362e-08, | |
| "loss": 0.9811, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 127150, | |
| "total_flos": 5.807966088205763e+17, | |
| "train_loss": 1.219492632603017, | |
| "train_runtime": 160361.9384, | |
| "train_samples_per_second": 38.059, | |
| "train_steps_per_second": 0.793 | |
| } | |
| ], | |
| "max_steps": 127150, | |
| "num_train_epochs": 10, | |
| "total_flos": 5.807966088205763e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |