|
{ |
|
"best_metric": 0.7300029397010803, |
|
"best_model_checkpoint": "experts/mistralic-expert-15/checkpoint-1000", |
|
"epoch": 0.32859607327692436, |
|
"eval_steps": 200, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8512, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7338, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7805, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7586, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7571, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8492, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8117, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7952, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8803, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8204, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.786, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8291, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7895, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8262, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8268, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8834, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7672, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7865, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8137, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7671, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.7465175986289978, |
|
"eval_runtime": 133.0528, |
|
"eval_samples_per_second": 7.516, |
|
"eval_steps_per_second": 3.758, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"mmlu_eval_accuracy": 0.5902724593152844, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.5, |
|
"mmlu_eval_accuracy_astronomy": 0.8125, |
|
"mmlu_eval_accuracy_business_ethics": 0.7272727272727273, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.5862068965517241, |
|
"mmlu_eval_accuracy_college_biology": 0.5, |
|
"mmlu_eval_accuracy_college_chemistry": 0.25, |
|
"mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, |
|
"mmlu_eval_accuracy_college_mathematics": 0.45454545454545453, |
|
"mmlu_eval_accuracy_college_medicine": 0.6363636363636364, |
|
"mmlu_eval_accuracy_college_physics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.5, |
|
"mmlu_eval_accuracy_econometrics": 0.5, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.5, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.43902439024390244, |
|
"mmlu_eval_accuracy_formal_logic": 0.07142857142857142, |
|
"mmlu_eval_accuracy_global_facts": 0.3, |
|
"mmlu_eval_accuracy_high_school_biology": 0.5625, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7222222222222222, |
|
"mmlu_eval_accuracy_high_school_geography": 0.8181818181818182, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.7142857142857143, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.5813953488372093, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.2413793103448276, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.6153846153846154, |
|
"mmlu_eval_accuracy_high_school_physics": 0.11764705882352941, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.85, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.43478260869565216, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.7727272727272727, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.6923076923076923, |
|
"mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
|
"mmlu_eval_accuracy_human_sexuality": 0.5, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.6363636363636364, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.6666666666666666, |
|
"mmlu_eval_accuracy_machine_learning": 0.5454545454545454, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.88, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7558139534883721, |
|
"mmlu_eval_accuracy_moral_disputes": 0.5526315789473685, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.37, |
|
"mmlu_eval_accuracy_nutrition": 0.696969696969697, |
|
"mmlu_eval_accuracy_philosophy": 0.7647058823529411, |
|
"mmlu_eval_accuracy_prehistory": 0.5428571428571428, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6129032258064516, |
|
"mmlu_eval_accuracy_professional_law": 0.3941176470588235, |
|
"mmlu_eval_accuracy_professional_medicine": 0.6451612903225806, |
|
"mmlu_eval_accuracy_professional_psychology": 0.5942028985507246, |
|
"mmlu_eval_accuracy_public_relations": 0.5, |
|
"mmlu_eval_accuracy_security_studies": 0.6296296296296297, |
|
"mmlu_eval_accuracy_sociology": 0.8181818181818182, |
|
"mmlu_eval_accuracy_us_foreign_policy": 0.9090909090909091, |
|
"mmlu_eval_accuracy_virology": 0.5555555555555556, |
|
"mmlu_eval_accuracy_world_religions": 0.7894736842105263, |
|
"mmlu_loss": 1.3589041333441323, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8347, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8393, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8098, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7982, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7871, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8817, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8185, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8334, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8398, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7625, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7923, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8421, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8105, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8017, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7675, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7919, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7954, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8182, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8229, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8335, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 0.7395919561386108, |
|
"eval_runtime": 132.8292, |
|
"eval_samples_per_second": 7.528, |
|
"eval_steps_per_second": 3.764, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"mmlu_eval_accuracy": 0.6007241471030027, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365, |
|
"mmlu_eval_accuracy_anatomy": 0.5, |
|
"mmlu_eval_accuracy_astronomy": 0.75, |
|
"mmlu_eval_accuracy_business_ethics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.5517241379310345, |
|
"mmlu_eval_accuracy_college_biology": 0.625, |
|
"mmlu_eval_accuracy_college_chemistry": 0.375, |
|
"mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, |
|
"mmlu_eval_accuracy_college_mathematics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_college_medicine": 0.5909090909090909, |
|
"mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
|
"mmlu_eval_accuracy_computer_security": 0.6363636363636364, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.5, |
|
"mmlu_eval_accuracy_econometrics": 0.5, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.5625, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.43902439024390244, |
|
"mmlu_eval_accuracy_formal_logic": 0.14285714285714285, |
|
"mmlu_eval_accuracy_global_facts": 0.3, |
|
"mmlu_eval_accuracy_high_school_biology": 0.53125, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.36363636363636365, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.8181818181818182, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.7619047619047619, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.6744186046511628, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.3103448275862069, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.6538461538461539, |
|
"mmlu_eval_accuracy_high_school_physics": 0.29411764705882354, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.8666666666666667, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.34782608695652173, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.7272727272727273, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.6538461538461539, |
|
"mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
|
"mmlu_eval_accuracy_human_sexuality": 0.5, |
|
"mmlu_eval_accuracy_international_law": 0.9230769230769231, |
|
"mmlu_eval_accuracy_jurisprudence": 0.6363636363636364, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.6111111111111112, |
|
"mmlu_eval_accuracy_machine_learning": 0.5454545454545454, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.92, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7558139534883721, |
|
"mmlu_eval_accuracy_moral_disputes": 0.6578947368421053, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.31, |
|
"mmlu_eval_accuracy_nutrition": 0.7575757575757576, |
|
"mmlu_eval_accuracy_philosophy": 0.6470588235294118, |
|
"mmlu_eval_accuracy_prehistory": 0.4857142857142857, |
|
"mmlu_eval_accuracy_professional_accounting": 0.6129032258064516, |
|
"mmlu_eval_accuracy_professional_law": 0.38823529411764707, |
|
"mmlu_eval_accuracy_professional_medicine": 0.6774193548387096, |
|
"mmlu_eval_accuracy_professional_psychology": 0.6376811594202898, |
|
"mmlu_eval_accuracy_public_relations": 0.5, |
|
"mmlu_eval_accuracy_security_studies": 0.7037037037037037, |
|
"mmlu_eval_accuracy_sociology": 0.8636363636363636, |
|
"mmlu_eval_accuracy_us_foreign_policy": 0.8181818181818182, |
|
"mmlu_eval_accuracy_virology": 0.6111111111111112, |
|
"mmlu_eval_accuracy_world_religions": 0.9473684210526315, |
|
"mmlu_loss": 1.0392796968550035, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8139, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8146, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8034, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7941, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7994, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7466, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8536, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.805, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8393, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7814, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.824, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8338, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8008, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7993, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7893, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7627, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8679, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7836, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7854, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.789, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.7367475628852844, |
|
"eval_runtime": 132.8557, |
|
"eval_samples_per_second": 7.527, |
|
"eval_steps_per_second": 3.763, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"mmlu_eval_accuracy": 0.594293155214947, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
|
"mmlu_eval_accuracy_astronomy": 0.8125, |
|
"mmlu_eval_accuracy_business_ethics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.5862068965517241, |
|
"mmlu_eval_accuracy_college_biology": 0.5, |
|
"mmlu_eval_accuracy_college_chemistry": 0.375, |
|
"mmlu_eval_accuracy_college_computer_science": 0.36363636363636365, |
|
"mmlu_eval_accuracy_college_mathematics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_college_medicine": 0.5909090909090909, |
|
"mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
|
"mmlu_eval_accuracy_computer_security": 0.5454545454545454, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.5, |
|
"mmlu_eval_accuracy_econometrics": 0.5, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.5, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.5121951219512195, |
|
"mmlu_eval_accuracy_formal_logic": 0.14285714285714285, |
|
"mmlu_eval_accuracy_global_facts": 0.4, |
|
"mmlu_eval_accuracy_high_school_biology": 0.53125, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.45454545454545453, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7777777777777778, |
|
"mmlu_eval_accuracy_high_school_geography": 0.8636363636363636, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.7619047619047619, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.6046511627906976, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.2413793103448276, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.6153846153846154, |
|
"mmlu_eval_accuracy_high_school_physics": 0.17647058823529413, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.8166666666666667, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.4782608695652174, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.6818181818181818, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.7307692307692307, |
|
"mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
|
"mmlu_eval_accuracy_human_sexuality": 0.5, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.6363636363636364, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.6111111111111112, |
|
"mmlu_eval_accuracy_machine_learning": 0.45454545454545453, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.92, |
|
"mmlu_eval_accuracy_medical_genetics": 0.8181818181818182, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7325581395348837, |
|
"mmlu_eval_accuracy_moral_disputes": 0.631578947368421, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.39, |
|
"mmlu_eval_accuracy_nutrition": 0.7575757575757576, |
|
"mmlu_eval_accuracy_philosophy": 0.6764705882352942, |
|
"mmlu_eval_accuracy_prehistory": 0.5428571428571428, |
|
"mmlu_eval_accuracy_professional_accounting": 0.5483870967741935, |
|
"mmlu_eval_accuracy_professional_law": 0.38823529411764707, |
|
"mmlu_eval_accuracy_professional_medicine": 0.6129032258064516, |
|
"mmlu_eval_accuracy_professional_psychology": 0.6231884057971014, |
|
"mmlu_eval_accuracy_public_relations": 0.5, |
|
"mmlu_eval_accuracy_security_studies": 0.7037037037037037, |
|
"mmlu_eval_accuracy_sociology": 0.8636363636363636, |
|
"mmlu_eval_accuracy_us_foreign_policy": 0.8181818181818182, |
|
"mmlu_eval_accuracy_virology": 0.5555555555555556, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.1680357199813927, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7913, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8255, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8075, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7866, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7553, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8378, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8303, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8294, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7974, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.837, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8102, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7953, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.841, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8219, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7431, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7249, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8422, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8637, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7363, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7515, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.7350410223007202, |
|
"eval_runtime": 132.8114, |
|
"eval_samples_per_second": 7.529, |
|
"eval_steps_per_second": 3.765, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"mmlu_eval_accuracy": 0.5862496128506853, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
|
"mmlu_eval_accuracy_astronomy": 0.75, |
|
"mmlu_eval_accuracy_business_ethics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.5517241379310345, |
|
"mmlu_eval_accuracy_college_biology": 0.5, |
|
"mmlu_eval_accuracy_college_chemistry": 0.25, |
|
"mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, |
|
"mmlu_eval_accuracy_college_mathematics": 0.6363636363636364, |
|
"mmlu_eval_accuracy_college_medicine": 0.5909090909090909, |
|
"mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
|
"mmlu_eval_accuracy_computer_security": 0.5454545454545454, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.5, |
|
"mmlu_eval_accuracy_econometrics": 0.4166666666666667, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.4375, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.43902439024390244, |
|
"mmlu_eval_accuracy_formal_logic": 0.21428571428571427, |
|
"mmlu_eval_accuracy_global_facts": 0.3, |
|
"mmlu_eval_accuracy_high_school_biology": 0.5625, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.6666666666666666, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.8333333333333334, |
|
"mmlu_eval_accuracy_high_school_geography": 0.8181818181818182, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.7619047619047619, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.6511627906976745, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.20689655172413793, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.6538461538461539, |
|
"mmlu_eval_accuracy_high_school_physics": 0.29411764705882354, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.8333333333333334, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.43478260869565216, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.7272727272727273, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.6538461538461539, |
|
"mmlu_eval_accuracy_human_aging": 0.7391304347826086, |
|
"mmlu_eval_accuracy_human_sexuality": 0.5, |
|
"mmlu_eval_accuracy_international_law": 0.9230769230769231, |
|
"mmlu_eval_accuracy_jurisprudence": 0.45454545454545453, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.6111111111111112, |
|
"mmlu_eval_accuracy_machine_learning": 0.45454545454545453, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.92, |
|
"mmlu_eval_accuracy_medical_genetics": 0.8181818181818182, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7209302325581395, |
|
"mmlu_eval_accuracy_moral_disputes": 0.6052631578947368, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.4, |
|
"mmlu_eval_accuracy_nutrition": 0.7878787878787878, |
|
"mmlu_eval_accuracy_philosophy": 0.6176470588235294, |
|
"mmlu_eval_accuracy_prehistory": 0.5142857142857142, |
|
"mmlu_eval_accuracy_professional_accounting": 0.5161290322580645, |
|
"mmlu_eval_accuracy_professional_law": 0.37058823529411766, |
|
"mmlu_eval_accuracy_professional_medicine": 0.6129032258064516, |
|
"mmlu_eval_accuracy_professional_psychology": 0.6376811594202898, |
|
"mmlu_eval_accuracy_public_relations": 0.5, |
|
"mmlu_eval_accuracy_security_studies": 0.7407407407407407, |
|
"mmlu_eval_accuracy_sociology": 0.9090909090909091, |
|
"mmlu_eval_accuracy_us_foreign_policy": 0.8181818181818182, |
|
"mmlu_eval_accuracy_virology": 0.5555555555555556, |
|
"mmlu_eval_accuracy_world_religions": 0.8421052631578947, |
|
"mmlu_loss": 1.2610168881889423, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7708, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7835, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7705, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8067, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.789, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7876, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8059, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8219, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7654, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8648, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7738, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7952, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8421, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7871, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7859, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8222, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.778, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8145, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7729, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7829, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 0.7300029397010803, |
|
"eval_runtime": 132.8403, |
|
"eval_samples_per_second": 7.528, |
|
"eval_steps_per_second": 3.764, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"mmlu_eval_accuracy": 0.5863749078875924, |
|
"mmlu_eval_accuracy_abstract_algebra": 0.2727272727272727, |
|
"mmlu_eval_accuracy_anatomy": 0.5714285714285714, |
|
"mmlu_eval_accuracy_astronomy": 0.75, |
|
"mmlu_eval_accuracy_business_ethics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_clinical_knowledge": 0.5862068965517241, |
|
"mmlu_eval_accuracy_college_biology": 0.5, |
|
"mmlu_eval_accuracy_college_chemistry": 0.375, |
|
"mmlu_eval_accuracy_college_computer_science": 0.45454545454545453, |
|
"mmlu_eval_accuracy_college_mathematics": 0.5454545454545454, |
|
"mmlu_eval_accuracy_college_medicine": 0.6363636363636364, |
|
"mmlu_eval_accuracy_college_physics": 0.45454545454545453, |
|
"mmlu_eval_accuracy_computer_security": 0.7272727272727273, |
|
"mmlu_eval_accuracy_conceptual_physics": 0.5, |
|
"mmlu_eval_accuracy_econometrics": 0.5, |
|
"mmlu_eval_accuracy_electrical_engineering": 0.375, |
|
"mmlu_eval_accuracy_elementary_mathematics": 0.5121951219512195, |
|
"mmlu_eval_accuracy_formal_logic": 0.14285714285714285, |
|
"mmlu_eval_accuracy_global_facts": 0.5, |
|
"mmlu_eval_accuracy_high_school_biology": 0.5625, |
|
"mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091, |
|
"mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556, |
|
"mmlu_eval_accuracy_high_school_european_history": 0.7222222222222222, |
|
"mmlu_eval_accuracy_high_school_geography": 0.8636363636363636, |
|
"mmlu_eval_accuracy_high_school_government_and_politics": 0.7142857142857143, |
|
"mmlu_eval_accuracy_high_school_macroeconomics": 0.6511627906976745, |
|
"mmlu_eval_accuracy_high_school_mathematics": 0.2413793103448276, |
|
"mmlu_eval_accuracy_high_school_microeconomics": 0.6538461538461539, |
|
"mmlu_eval_accuracy_high_school_physics": 0.17647058823529413, |
|
"mmlu_eval_accuracy_high_school_psychology": 0.85, |
|
"mmlu_eval_accuracy_high_school_statistics": 0.391304347826087, |
|
"mmlu_eval_accuracy_high_school_us_history": 0.7727272727272727, |
|
"mmlu_eval_accuracy_high_school_world_history": 0.6923076923076923, |
|
"mmlu_eval_accuracy_human_aging": 0.782608695652174, |
|
"mmlu_eval_accuracy_human_sexuality": 0.5, |
|
"mmlu_eval_accuracy_international_law": 1.0, |
|
"mmlu_eval_accuracy_jurisprudence": 0.45454545454545453, |
|
"mmlu_eval_accuracy_logical_fallacies": 0.6111111111111112, |
|
"mmlu_eval_accuracy_machine_learning": 0.2727272727272727, |
|
"mmlu_eval_accuracy_management": 0.9090909090909091, |
|
"mmlu_eval_accuracy_marketing": 0.92, |
|
"mmlu_eval_accuracy_medical_genetics": 0.9090909090909091, |
|
"mmlu_eval_accuracy_miscellaneous": 0.7209302325581395, |
|
"mmlu_eval_accuracy_moral_disputes": 0.5789473684210527, |
|
"mmlu_eval_accuracy_moral_scenarios": 0.35, |
|
"mmlu_eval_accuracy_nutrition": 0.7272727272727273, |
|
"mmlu_eval_accuracy_philosophy": 0.6764705882352942, |
|
"mmlu_eval_accuracy_prehistory": 0.5714285714285714, |
|
"mmlu_eval_accuracy_professional_accounting": 0.5161290322580645, |
|
"mmlu_eval_accuracy_professional_law": 0.38235294117647056, |
|
"mmlu_eval_accuracy_professional_medicine": 0.5483870967741935, |
|
"mmlu_eval_accuracy_professional_psychology": 0.5797101449275363, |
|
"mmlu_eval_accuracy_public_relations": 0.5833333333333334, |
|
"mmlu_eval_accuracy_security_studies": 0.5925925925925926, |
|
"mmlu_eval_accuracy_sociology": 0.8181818181818182, |
|
"mmlu_eval_accuracy_us_foreign_policy": 0.8181818181818182, |
|
"mmlu_eval_accuracy_virology": 0.5, |
|
"mmlu_eval_accuracy_world_religions": 0.8947368421052632, |
|
"mmlu_loss": 1.334560420821292, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9129, |
|
"num_train_epochs": 3, |
|
"save_steps": 200, |
|
"total_flos": 5.3490750465520435e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|