diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,18021 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.032951341851865414, + "eval_steps": 500, + "global_step": 1800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "acc_char": 0.2961, + "acc_token": 0.6732, + "epoch": 0.0, + "loss_char": 0.7519, + "loss_token": 1.6949, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 1 + }, + { + "acc_char": 0.3069, + "acc_token": 0.6884, + "epoch": 0.0, + "loss_char": 0.7203, + "loss_token": 1.6238, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 2 + }, + { + "acc_char": 0.2884, + "acc_token": 0.6605, + "epoch": 0.0, + "loss_char": 0.7484, + "loss_token": 1.6386, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 3 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6459, + "epoch": 0.0, + "loss_char": 0.8252, + "loss_token": 1.7039, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 4 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6684, + "epoch": 0.0, + "loss_char": 0.7786, + "loss_token": 1.6981, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 5 + }, + { + "acc_char": 0.3093, + "acc_token": 0.7014, + "epoch": 0.0, + "loss_char": 0.6768, + "loss_token": 1.5207, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 6 + }, + { + "acc_char": 0.3094, + "acc_token": 0.689, + "epoch": 0.0, + "loss_char": 0.6929, + "loss_token": 1.5691, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 7 + }, + { + "acc_char": 0.2961, + "acc_token": 0.6754, + "epoch": 0.0, + "loss_char": 0.7308, + "loss_token": 1.6342, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 8 + }, + { + "acc_char": 0.2917, + "acc_token": 0.664, + "epoch": 0.0, + "loss_char": 0.7558, + "loss_token": 1.6594, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 9 + }, + { + "acc_char": 0.2949, + "acc_token": 0.6353, + "epoch": 0.0, + "loss_char": 0.8635, + "loss_token": 1.7367, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 10 + }, + { + "acc_char": 0.2912, + "acc_token": 0.6564, + "epoch": 0.0, + "loss_char": 0.7989, + "loss_token": 1.6703, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 11 + }, + { + "acc_char": 0.2945, + "acc_token": 0.6563, + "epoch": 0.0, + "loss_char": 0.815, + "loss_token": 1.7397, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 12 + }, + { + "acc_char": 0.3069, + "acc_token": 0.7084, + "epoch": 0.0, + "loss_char": 0.644, + "loss_token": 1.5543, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 13 + }, + { + "acc_char": 0.3055, + "acc_token": 0.677, + "epoch": 0.0, + "loss_char": 0.7408, + "loss_token": 1.619, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 14 + }, + { + "acc_char": 0.3079, + "acc_token": 0.6895, + "epoch": 0.0, + "loss_char": 0.7422, + "loss_token": 1.6489, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 15 + }, + { + "acc_char": 0.2986, + "acc_token": 0.6883, + "epoch": 0.0, + "loss_char": 0.6958, + "loss_token": 1.5508, + "lr": "0.00e+00", + "norm": "0.00e+00", + "step": 16 + }, + { + "acc_char": 0.2952, + "acc_token": 0.6751, + "epoch": 0.0, + "loss_char": 0.7564, + "loss_token": 1.7334, + "lr": "1.00e-07", + "norm": 5.9607, + "step": 17 + }, + { + "acc_char": 0.2883, + "acc_token": 0.6696, + "epoch": 0.0, + "loss_char": 0.7107, + "loss_token": 1.7119, + "lr": "1.00e-07", + "norm": 5.9607, + "step": 18 + }, + { + "acc_char": 0.2939, + "acc_token": 0.667, + "epoch": 0.0, + "loss_char": 0.7545, + "loss_token": 1.7812, + "lr": "2.00e-07", + "norm": 6.3926, + "step": 19 + }, + { + "acc_char": 0.3025, + "acc_token": 0.689, + "epoch": 0.0, + "loss_char": 0.7043, + "loss_token": 1.6273, + "lr": "2.00e-07", + "norm": 5.7886, + "step": 20 + }, + { + "acc_char": 0.29, + "acc_token": 0.6184, + "epoch": 0.0, + "loss_char": 0.9418, + "loss_token": 1.7781, + "lr": "2.00e-07", + "norm": 5.9238, + "step": 21 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6826, + "epoch": 0.0, + "loss_char": 0.7337, + "loss_token": 1.6468, + "lr": "2.00e-07", + "norm": 6.2689, + "step": 22 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6894, + "epoch": 0.0, + "loss_char": 0.703, + "loss_token": 1.6869, + "lr": "2.00e-07", + "norm": 5.9656, + "step": 23 + }, + { + "acc_char": 0.303, + "acc_token": 0.6772, + "epoch": 0.0, + "loss_char": 0.7587, + "loss_token": 1.6943, + "lr": "2.00e-07", + "norm": 5.8126, + "step": 24 + }, + { + "acc_char": 0.2892, + "acc_token": 0.6357, + "epoch": 0.0, + "loss_char": 0.826, + "loss_token": 1.761, + "lr": "2.00e-07", + "norm": 5.7309, + "step": 25 + }, + { + "acc_char": 0.2755, + "acc_token": 0.6058, + "epoch": 0.0, + "loss_char": 0.932, + "loss_token": 1.8898, + "lr": "2.00e-07", + "norm": 5.7804, + "step": 26 + }, + { + "acc_char": 0.2905, + "acc_token": 0.6638, + "epoch": 0.0, + "loss_char": 0.7657, + "loss_token": 1.6386, + "lr": "2.00e-07", + "norm": 5.6621, + "step": 27 + }, + { + "acc_char": 0.2955, + "acc_token": 0.6773, + "epoch": 0.0, + "loss_char": 0.72, + "loss_token": 1.7086, + "lr": "2.00e-07", + "norm": 5.9964, + "step": 28 + }, + { + "acc_char": 0.2825, + "acc_token": 0.6586, + "epoch": 0.0, + "loss_char": 0.7422, + "loss_token": 1.7517, + "lr": "2.00e-07", + "norm": 5.8231, + "step": 29 + }, + { + "acc_char": 0.2974, + "acc_token": 0.6798, + "epoch": 0.0, + "loss_char": 0.7219, + "loss_token": 1.7468, + "lr": "2.00e-07", + "norm": 5.9514, + "step": 30 + }, + { + "acc_char": 0.2956, + "acc_token": 0.6434, + "epoch": 0.0, + "loss_char": 0.8223, + "loss_token": 1.6483, + "lr": "2.00e-07", + "norm": 5.6608, + "step": 31 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6829, + "epoch": 0.0, + "loss_char": 0.7012, + "loss_token": 1.5172, + "lr": "2.00e-07", + "norm": 5.652, + "step": 32 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6764, + "epoch": 0.0, + "loss_char": 0.7588, + "loss_token": 1.7038, + "lr": "2.00e-07", + "norm": 5.8963, + "step": 33 + }, + { + "acc_char": 0.2916, + "acc_token": 0.6639, + "epoch": 0.0, + "loss_char": 0.7669, + "loss_token": 1.7697, + "lr": "2.00e-07", + "norm": 6.2551, + "step": 34 + }, + { + "acc_char": 0.2868, + "acc_token": 0.6746, + "epoch": 0.0, + "loss_char": 0.7113, + "loss_token": 1.7143, + "lr": "2.00e-07", + "norm": 5.8972, + "step": 35 + }, + { + "acc_char": 0.2844, + "acc_token": 0.6591, + "epoch": 0.0, + "loss_char": 0.7909, + "loss_token": 1.8081, + "lr": "2.00e-07", + "norm": 6.105, + "step": 36 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6838, + "epoch": 0.0, + "loss_char": 0.7246, + "loss_token": 1.6531, + "lr": "2.00e-07", + "norm": 5.7111, + "step": 37 + }, + { + "acc_char": 0.2848, + "acc_token": 0.6208, + "epoch": 0.0, + "loss_char": 0.8921, + "loss_token": 1.8735, + "lr": "2.00e-07", + "norm": 5.7803, + "step": 38 + }, + { + "acc_char": 0.2753, + "acc_token": 0.5703, + "epoch": 0.0, + "loss_char": 1.0188, + "loss_token": 1.9257, + "lr": "2.00e-07", + "norm": 5.8799, + "step": 39 + }, + { + "acc_char": 0.3044, + "acc_token": 0.6826, + "epoch": 0.0, + "loss_char": 0.7192, + "loss_token": 1.6524, + "lr": "2.00e-07", + "norm": 5.8362, + "step": 40 + }, + { + "acc_char": 0.313, + "acc_token": 0.6983, + "epoch": 0.0, + "loss_char": 0.6946, + "loss_token": 1.6673, + "lr": "2.00e-07", + "norm": 5.733, + "step": 41 + }, + { + "acc_char": 0.3086, + "acc_token": 0.7071, + "epoch": 0.0, + "loss_char": 0.668, + "loss_token": 1.5743, + "lr": "2.00e-07", + "norm": 5.5149, + "step": 42 + }, + { + "acc_char": 0.2965, + "acc_token": 0.6688, + "epoch": 0.0, + "loss_char": 0.7779, + "loss_token": 1.6748, + "lr": "2.00e-07", + "norm": 5.7856, + "step": 43 + }, + { + "acc_char": 0.298, + "acc_token": 0.6751, + "epoch": 0.0, + "loss_char": 0.7445, + "loss_token": 1.7059, + "lr": "2.00e-07", + "norm": 5.9937, + "step": 44 + }, + { + "acc_char": 0.2831, + "acc_token": 0.6538, + "epoch": 0.0, + "loss_char": 0.7752, + "loss_token": 1.6552, + "lr": "2.00e-07", + "norm": 5.5977, + "step": 45 + }, + { + "acc_char": 0.2992, + "acc_token": 0.677, + "epoch": 0.0, + "loss_char": 0.7412, + "loss_token": 1.6394, + "lr": "2.00e-07", + "norm": 5.6584, + "step": 46 + }, + { + "acc_char": 0.2969, + "acc_token": 0.6693, + "epoch": 0.0, + "loss_char": 0.7711, + "loss_token": 1.6313, + "lr": "2.00e-07", + "norm": 5.8419, + "step": 47 + }, + { + "acc_char": 0.2975, + "acc_token": 0.6811, + "epoch": 0.0, + "loss_char": 0.7134, + "loss_token": 1.7313, + "lr": "2.00e-07", + "norm": 5.9012, + "step": 48 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6828, + "epoch": 0.0, + "loss_char": 0.7124, + "loss_token": 1.6284, + "lr": "2.00e-07", + "norm": 5.6233, + "step": 49 + }, + { + "acc_char": 0.2972, + "acc_token": 0.6354, + "epoch": 0.0, + "loss_char": 0.8823, + "loss_token": 1.7413, + "lr": "2.00e-07", + "norm": 5.3973, + "step": 50 + }, + { + "acc_char": 0.3022, + "acc_token": 0.6905, + "epoch": 0.0, + "loss_char": 0.7153, + "loss_token": 1.5482, + "lr": "2.00e-07", + "norm": 5.6895, + "step": 51 + }, + { + "acc_char": 0.2982, + "acc_token": 0.6686, + "epoch": 0.0, + "loss_char": 0.7733, + "loss_token": 1.6329, + "lr": "2.00e-07", + "norm": 6.9704, + "step": 52 + }, + { + "acc_char": 0.2877, + "acc_token": 0.6616, + "epoch": 0.0, + "loss_char": 0.7528, + "loss_token": 1.6389, + "lr": "2.00e-07", + "norm": 6.0386, + "step": 53 + }, + { + "acc_char": 0.298, + "acc_token": 0.6711, + "epoch": 0.0, + "loss_char": 0.7559, + "loss_token": 1.7198, + "lr": "2.00e-07", + "norm": 5.7229, + "step": 54 + }, + { + "acc_char": 0.2947, + "acc_token": 0.6745, + "epoch": 0.0, + "loss_char": 0.7399, + "loss_token": 1.7187, + "lr": "2.00e-07", + "norm": 5.7068, + "step": 55 + }, + { + "acc_char": 0.3029, + "acc_token": 0.6835, + "epoch": 0.0, + "loss_char": 0.7238, + "loss_token": 1.5982, + "lr": "2.00e-07", + "norm": 5.517, + "step": 56 + }, + { + "acc_char": 0.3065, + "acc_token": 0.6897, + "epoch": 0.0, + "loss_char": 0.7217, + "loss_token": 1.6287, + "lr": "2.00e-07", + "norm": 5.6666, + "step": 57 + }, + { + "acc_char": 0.3045, + "acc_token": 0.6686, + "epoch": 0.0, + "loss_char": 0.7933, + "loss_token": 1.6452, + "lr": "2.00e-07", + "norm": 5.8336, + "step": 58 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6734, + "epoch": 0.0, + "loss_char": 0.75, + "loss_token": 1.6541, + "lr": "2.00e-07", + "norm": 5.731, + "step": 59 + }, + { + "acc_char": 0.2929, + "acc_token": 0.6754, + "epoch": 0.0, + "loss_char": 0.742, + "loss_token": 1.7271, + "lr": "2.00e-07", + "norm": 6.1144, + "step": 60 + }, + { + "acc_char": 0.2977, + "acc_token": 0.6707, + "epoch": 0.0, + "loss_char": 0.7895, + "loss_token": 1.7199, + "lr": "2.00e-07", + "norm": 5.7852, + "step": 61 + }, + { + "acc_char": 0.299, + "acc_token": 0.6724, + "epoch": 0.0, + "loss_char": 0.7415, + "loss_token": 1.6173, + "lr": "2.00e-07", + "norm": 5.7001, + "step": 62 + }, + { + "acc_char": 0.3119, + "acc_token": 0.7042, + "epoch": 0.0, + "loss_char": 0.6822, + "loss_token": 1.6126, + "lr": "2.00e-07", + "norm": 5.7258, + "step": 63 + }, + { + "acc_char": 0.3012, + "acc_token": 0.6924, + "epoch": 0.0, + "loss_char": 0.6868, + "loss_token": 1.6205, + "lr": "2.00e-07", + "norm": 5.8205, + "step": 64 + }, + { + "acc_char": 0.3083, + "acc_token": 0.6987, + "epoch": 0.0, + "loss_char": 0.6842, + "loss_token": 1.6814, + "lr": "2.00e-07", + "norm": 5.8432, + "step": 65 + }, + { + "acc_char": 0.31, + "acc_token": 0.7027, + "epoch": 0.0, + "loss_char": 0.673, + "loss_token": 1.6156, + "lr": "2.00e-07", + "norm": 5.914, + "step": 66 + }, + { + "acc_char": 0.3223, + "acc_token": 0.7415, + "epoch": 0.0, + "loss_char": 0.5721, + "loss_token": 1.4831, + "lr": "2.00e-07", + "norm": 5.512, + "step": 67 + }, + { + "acc_char": 0.2937, + "acc_token": 0.6344, + "epoch": 0.0, + "loss_char": 0.8655, + "loss_token": 1.815, + "lr": "2.00e-07", + "norm": 5.8184, + "step": 68 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6775, + "epoch": 0.0, + "loss_char": 0.7422, + "loss_token": 1.682, + "lr": "2.00e-07", + "norm": 5.5313, + "step": 69 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6931, + "epoch": 0.0, + "loss_char": 0.6702, + "loss_token": 1.5349, + "lr": "2.00e-07", + "norm": 5.5487, + "step": 70 + }, + { + "acc_char": 0.2931, + "acc_token": 0.6635, + "epoch": 0.0, + "loss_char": 0.7725, + "loss_token": 1.6859, + "lr": "2.00e-07", + "norm": 5.7526, + "step": 71 + }, + { + "acc_char": 0.3132, + "acc_token": 0.7059, + "epoch": 0.0, + "loss_char": 0.6693, + "loss_token": 1.5245, + "lr": "2.00e-07", + "norm": 5.511, + "step": 72 + }, + { + "acc_char": 0.2954, + "acc_token": 0.6704, + "epoch": 0.0, + "loss_char": 0.7482, + "loss_token": 1.6966, + "lr": "2.00e-07", + "norm": 5.7717, + "step": 73 + }, + { + "acc_char": 0.2911, + "acc_token": 0.6618, + "epoch": 0.0, + "loss_char": 0.7721, + "loss_token": 1.7499, + "lr": "2.00e-07", + "norm": 5.9435, + "step": 74 + }, + { + "acc_char": 0.3034, + "acc_token": 0.6888, + "epoch": 0.0, + "loss_char": 0.698, + "loss_token": 1.5809, + "lr": "2.00e-07", + "norm": 5.2705, + "step": 75 + }, + { + "acc_char": 0.3086, + "acc_token": 0.6984, + "epoch": 0.0, + "loss_char": 0.6715, + "loss_token": 1.6068, + "lr": "2.00e-07", + "norm": 5.429, + "step": 76 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6732, + "epoch": 0.0, + "loss_char": 0.7512, + "loss_token": 1.649, + "lr": "2.00e-07", + "norm": 5.7104, + "step": 77 + }, + { + "acc_char": 0.2966, + "acc_token": 0.6677, + "epoch": 0.0, + "loss_char": 0.7483, + "loss_token": 1.6908, + "lr": "2.00e-07", + "norm": 5.844, + "step": 78 + }, + { + "acc_char": 0.3089, + "acc_token": 0.6839, + "epoch": 0.0, + "loss_char": 0.7344, + "loss_token": 1.6171, + "lr": "2.00e-07", + "norm": 5.5551, + "step": 79 + }, + { + "acc_char": 0.3002, + "acc_token": 0.6858, + "epoch": 0.0, + "loss_char": 0.7015, + "loss_token": 1.5875, + "lr": "2.00e-07", + "norm": 5.7256, + "step": 80 + }, + { + "acc_char": 0.306, + "acc_token": 0.6897, + "epoch": 0.0, + "loss_char": 0.7126, + "loss_token": 1.6529, + "lr": "2.00e-07", + "norm": 5.757, + "step": 81 + }, + { + "acc_char": 0.29, + "acc_token": 0.6649, + "epoch": 0.0, + "loss_char": 0.7649, + "loss_token": 1.6658, + "lr": "2.00e-07", + "norm": 5.6632, + "step": 82 + }, + { + "acc_char": 0.3139, + "acc_token": 0.692, + "epoch": 0.0, + "loss_char": 0.7281, + "loss_token": 1.563, + "lr": "2.00e-07", + "norm": 5.7469, + "step": 83 + }, + { + "acc_char": 0.3093, + "acc_token": 0.6999, + "epoch": 0.0, + "loss_char": 0.6736, + "loss_token": 1.5781, + "lr": "2.00e-07", + "norm": 6.2755, + "step": 84 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6809, + "epoch": 0.0, + "loss_char": 0.7506, + "loss_token": 1.7318, + "lr": "2.00e-07", + "norm": 5.5039, + "step": 85 + }, + { + "acc_char": 0.2907, + "acc_token": 0.6696, + "epoch": 0.0, + "loss_char": 0.7501, + "loss_token": 1.7583, + "lr": "2.00e-07", + "norm": 5.8564, + "step": 86 + }, + { + "acc_char": 0.297, + "acc_token": 0.6717, + "epoch": 0.0, + "loss_char": 0.7485, + "loss_token": 1.7149, + "lr": "2.00e-07", + "norm": 5.8721, + "step": 87 + }, + { + "acc_char": 0.3062, + "acc_token": 0.685, + "epoch": 0.0, + "loss_char": 0.7216, + "loss_token": 1.6338, + "lr": "2.00e-07", + "norm": 5.7368, + "step": 88 + }, + { + "acc_char": 0.3074, + "acc_token": 0.6946, + "epoch": 0.0, + "loss_char": 0.698, + "loss_token": 1.6557, + "lr": "2.00e-07", + "norm": 5.6536, + "step": 89 + }, + { + "acc_char": 0.3309, + "acc_token": 0.7018, + "epoch": 0.0, + "loss_char": 0.6798, + "loss_token": 1.5062, + "lr": "2.00e-07", + "norm": 5.6085, + "step": 90 + }, + { + "acc_char": 0.3205, + "acc_token": 0.7148, + "epoch": 0.0, + "loss_char": 0.6533, + "loss_token": 1.5137, + "lr": "2.00e-07", + "norm": 5.9149, + "step": 91 + }, + { + "acc_char": 0.2983, + "acc_token": 0.6762, + "epoch": 0.0, + "loss_char": 0.7395, + "loss_token": 1.588, + "lr": "2.00e-07", + "norm": 5.5893, + "step": 92 + }, + { + "acc_char": 0.2925, + "acc_token": 0.6386, + "epoch": 0.0, + "loss_char": 0.8278, + "loss_token": 1.7404, + "lr": "2.00e-07", + "norm": 5.391, + "step": 93 + }, + { + "acc_char": 0.2976, + "acc_token": 0.6813, + "epoch": 0.0, + "loss_char": 0.7177, + "loss_token": 1.6489, + "lr": "2.00e-07", + "norm": 5.5271, + "step": 94 + }, + { + "acc_char": 0.3023, + "acc_token": 0.6715, + "epoch": 0.0, + "loss_char": 0.7614, + "loss_token": 1.6439, + "lr": "2.00e-07", + "norm": 5.7352, + "step": 95 + }, + { + "acc_char": 0.3102, + "acc_token": 0.7062, + "epoch": 0.0, + "loss_char": 0.6556, + "loss_token": 1.5339, + "lr": "2.00e-07", + "norm": 5.5893, + "step": 96 + }, + { + "acc_char": 0.3, + "acc_token": 0.6866, + "epoch": 0.0, + "loss_char": 0.7047, + "loss_token": 1.669, + "lr": "2.00e-07", + "norm": 5.4678, + "step": 97 + }, + { + "acc_char": 0.2867, + "acc_token": 0.6561, + "epoch": 0.0, + "loss_char": 0.7706, + "loss_token": 1.742, + "lr": "2.00e-07", + "norm": 6.2133, + "step": 98 + }, + { + "acc_char": 0.2934, + "acc_token": 0.6788, + "epoch": 0.0, + "loss_char": 0.7435, + "loss_token": 1.5761, + "lr": "2.00e-07", + "norm": 5.9936, + "step": 99 + }, + { + "acc_char": 0.3152, + "acc_token": 0.7196, + "epoch": 0.0, + "loss_char": 0.6295, + "loss_token": 1.4385, + "lr": "2.00e-07", + "norm": 5.3239, + "step": 100 + }, + { + "acc_char": 0.2895, + "acc_token": 0.6691, + "epoch": 0.0, + "loss_char": 0.7532, + "loss_token": 1.667, + "lr": "2.00e-07", + "norm": 5.7719, + "step": 101 + }, + { + "acc_char": 0.2954, + "acc_token": 0.6687, + "epoch": 0.0, + "loss_char": 0.7625, + "loss_token": 1.6473, + "lr": "2.00e-07", + "norm": 5.8406, + "step": 102 + }, + { + "acc_char": 0.2919, + "acc_token": 0.6414, + "epoch": 0.0, + "loss_char": 0.8862, + "loss_token": 1.8522, + "lr": "2.00e-07", + "norm": 5.8152, + "step": 103 + }, + { + "acc_char": 0.2972, + "acc_token": 0.688, + "epoch": 0.0, + "loss_char": 0.7107, + "loss_token": 1.6225, + "lr": "2.00e-07", + "norm": 5.7036, + "step": 104 + }, + { + "acc_char": 0.3106, + "acc_token": 0.7055, + "epoch": 0.0, + "loss_char": 0.6515, + "loss_token": 1.5368, + "lr": "2.00e-07", + "norm": 5.5132, + "step": 105 + }, + { + "acc_char": 0.3163, + "acc_token": 0.7085, + "epoch": 0.0, + "loss_char": 0.6619, + "loss_token": 1.5181, + "lr": "2.00e-07", + "norm": 5.7377, + "step": 106 + }, + { + "acc_char": 0.296, + "acc_token": 0.6795, + "epoch": 0.0, + "loss_char": 0.724, + "loss_token": 1.6804, + "lr": "2.00e-07", + "norm": 5.6408, + "step": 107 + }, + { + "acc_char": 0.3114, + "acc_token": 0.6911, + "epoch": 0.0, + "loss_char": 0.7201, + "loss_token": 1.4931, + "lr": "2.00e-07", + "norm": 5.6982, + "step": 108 + }, + { + "acc_char": 0.3197, + "acc_token": 0.7152, + "epoch": 0.0, + "loss_char": 0.6508, + "loss_token": 1.5723, + "lr": "2.00e-07", + "norm": 5.9246, + "step": 109 + }, + { + "acc_char": 0.3119, + "acc_token": 0.6919, + "epoch": 0.0, + "loss_char": 0.7435, + "loss_token": 1.6627, + "lr": "2.00e-07", + "norm": 6.0751, + "step": 110 + }, + { + "acc_char": 0.2963, + "acc_token": 0.648, + "epoch": 0.0, + "loss_char": 0.8038, + "loss_token": 1.7077, + "lr": "2.00e-07", + "norm": 5.5694, + "step": 111 + }, + { + "acc_char": 0.2931, + "acc_token": 0.6728, + "epoch": 0.0, + "loss_char": 0.7443, + "loss_token": 1.6811, + "lr": "2.00e-07", + "norm": 6.3389, + "step": 112 + }, + { + "acc_char": 0.2902, + "acc_token": 0.6706, + "epoch": 0.0, + "loss_char": 0.7368, + "loss_token": 1.5372, + "lr": "2.00e-07", + "norm": 5.4269, + "step": 113 + }, + { + "acc_char": 0.2961, + "acc_token": 0.6749, + "epoch": 0.0, + "loss_char": 0.762, + "loss_token": 1.76, + "lr": "2.00e-07", + "norm": 5.9477, + "step": 114 + }, + { + "acc_char": 0.3006, + "acc_token": 0.6797, + "epoch": 0.0, + "loss_char": 0.7402, + "loss_token": 1.6541, + "lr": "2.00e-07", + "norm": 5.6794, + "step": 115 + }, + { + "acc_char": 0.3053, + "acc_token": 0.6901, + "epoch": 0.0, + "loss_char": 0.6922, + "loss_token": 1.5729, + "lr": "2.00e-07", + "norm": 5.4315, + "step": 116 + }, + { + "acc_char": 0.3027, + "acc_token": 0.676, + "epoch": 0.0, + "loss_char": 0.7406, + "loss_token": 1.7055, + "lr": "2.00e-07", + "norm": 6.0586, + "step": 117 + }, + { + "acc_char": 0.3071, + "acc_token": 0.6983, + "epoch": 0.0, + "loss_char": 0.6718, + "loss_token": 1.6349, + "lr": "2.00e-07", + "norm": 5.6116, + "step": 118 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6823, + "epoch": 0.0, + "loss_char": 0.7209, + "loss_token": 1.7037, + "lr": "2.00e-07", + "norm": 5.8724, + "step": 119 + }, + { + "acc_char": 0.3055, + "acc_token": 0.6851, + "epoch": 0.0, + "loss_char": 0.7164, + "loss_token": 1.5718, + "lr": "2.00e-07", + "norm": 6.0997, + "step": 120 + }, + { + "acc_char": 0.2934, + "acc_token": 0.6588, + "epoch": 0.0, + "loss_char": 0.8186, + "loss_token": 1.8209, + "lr": "2.00e-07", + "norm": 6.1657, + "step": 121 + }, + { + "acc_char": 0.3114, + "acc_token": 0.6917, + "epoch": 0.0, + "loss_char": 0.7182, + "loss_token": 1.6419, + "lr": "2.00e-07", + "norm": 5.763, + "step": 122 + }, + { + "acc_char": 0.3019, + "acc_token": 0.693, + "epoch": 0.0, + "loss_char": 0.6769, + "loss_token": 1.5495, + "lr": "2.00e-07", + "norm": 5.6592, + "step": 123 + }, + { + "acc_char": 0.2885, + "acc_token": 0.6628, + "epoch": 0.0, + "loss_char": 0.7731, + "loss_token": 1.6606, + "lr": "2.00e-07", + "norm": 5.8882, + "step": 124 + }, + { + "acc_char": 0.3051, + "acc_token": 0.6748, + "epoch": 0.0, + "loss_char": 0.7521, + "loss_token": 1.7262, + "lr": "2.00e-07", + "norm": 5.8277, + "step": 125 + }, + { + "acc_char": 0.2974, + "acc_token": 0.6753, + "epoch": 0.0, + "loss_char": 0.7267, + "loss_token": 1.6828, + "lr": "2.00e-07", + "norm": 5.8624, + "step": 126 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6713, + "epoch": 0.0, + "loss_char": 0.7441, + "loss_token": 1.6326, + "lr": "2.00e-07", + "norm": 5.6445, + "step": 127 + }, + { + "acc_char": 0.3151, + "acc_token": 0.7024, + "epoch": 0.0, + "loss_char": 0.7068, + "loss_token": 1.5497, + "lr": "2.00e-07", + "norm": 5.731, + "step": 128 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6809, + "epoch": 0.0, + "loss_char": 0.7264, + "loss_token": 1.654, + "lr": "2.00e-07", + "norm": 5.6777, + "step": 129 + }, + { + "acc_char": 0.3074, + "acc_token": 0.7082, + "epoch": 0.0, + "loss_char": 0.6399, + "loss_token": 1.5027, + "lr": "2.00e-07", + "norm": 5.864, + "step": 130 + }, + { + "acc_char": 0.2976, + "acc_token": 0.667, + "epoch": 0.0, + "loss_char": 0.7645, + "loss_token": 1.663, + "lr": "2.00e-07", + "norm": 6.123, + "step": 131 + }, + { + "acc_char": 0.3146, + "acc_token": 0.703, + "epoch": 0.0, + "loss_char": 0.6845, + "loss_token": 1.5722, + "lr": "2.00e-07", + "norm": 5.6312, + "step": 132 + }, + { + "acc_char": 0.3033, + "acc_token": 0.684, + "epoch": 0.0, + "loss_char": 0.7349, + "loss_token": 1.6021, + "lr": "2.00e-07", + "norm": 5.604, + "step": 133 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6914, + "epoch": 0.0, + "loss_char": 0.7159, + "loss_token": 1.627, + "lr": "2.00e-07", + "norm": 5.8821, + "step": 134 + }, + { + "acc_char": 0.301, + "acc_token": 0.6744, + "epoch": 0.0, + "loss_char": 0.7437, + "loss_token": 1.6091, + "lr": "2.00e-07", + "norm": 5.7012, + "step": 135 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6793, + "epoch": 0.0, + "loss_char": 0.7243, + "loss_token": 1.6167, + "lr": "2.00e-07", + "norm": 5.7057, + "step": 136 + }, + { + "acc_char": 0.322, + "acc_token": 0.7129, + "epoch": 0.0, + "loss_char": 0.6653, + "loss_token": 1.5636, + "lr": "2.00e-07", + "norm": 5.4494, + "step": 137 + }, + { + "acc_char": 0.2882, + "acc_token": 0.6662, + "epoch": 0.0, + "loss_char": 0.7417, + "loss_token": 1.6943, + "lr": "2.00e-07", + "norm": 5.88, + "step": 138 + }, + { + "acc_char": 0.2908, + "acc_token": 0.6602, + "epoch": 0.0, + "loss_char": 0.7742, + "loss_token": 1.7059, + "lr": "2.00e-07", + "norm": 5.6887, + "step": 139 + }, + { + "acc_char": 0.2988, + "acc_token": 0.6609, + "epoch": 0.0, + "loss_char": 0.8141, + "loss_token": 1.6664, + "lr": "2.00e-07", + "norm": 5.6956, + "step": 140 + }, + { + "acc_char": 0.3026, + "acc_token": 0.6739, + "epoch": 0.0, + "loss_char": 0.7646, + "loss_token": 1.7001, + "lr": "2.00e-07", + "norm": 5.7097, + "step": 141 + }, + { + "acc_char": 0.2945, + "acc_token": 0.6707, + "epoch": 0.0, + "loss_char": 0.7792, + "loss_token": 1.7446, + "lr": "2.00e-07", + "norm": 6.1457, + "step": 142 + }, + { + "acc_char": 0.298, + "acc_token": 0.6815, + "epoch": 0.0, + "loss_char": 0.7148, + "loss_token": 1.6377, + "lr": "2.00e-07", + "norm": 5.5933, + "step": 143 + }, + { + "acc_char": 0.3128, + "acc_token": 0.714, + "epoch": 0.0, + "loss_char": 0.6391, + "loss_token": 1.5338, + "lr": "2.00e-07", + "norm": 5.5648, + "step": 144 + }, + { + "acc_char": 0.3061, + "acc_token": 0.6973, + "epoch": 0.0, + "loss_char": 0.6774, + "loss_token": 1.5908, + "lr": "2.00e-07", + "norm": 5.6272, + "step": 145 + }, + { + "acc_char": 0.3008, + "acc_token": 0.6728, + "epoch": 0.0, + "loss_char": 0.7721, + "loss_token": 1.6977, + "lr": "2.00e-07", + "norm": 6.0004, + "step": 146 + }, + { + "acc_char": 0.2837, + "acc_token": 0.6515, + "epoch": 0.0, + "loss_char": 0.8178, + "loss_token": 1.7521, + "lr": "2.00e-07", + "norm": 5.9622, + "step": 147 + }, + { + "acc_char": 0.2911, + "acc_token": 0.6604, + "epoch": 0.0, + "loss_char": 0.8271, + "loss_token": 1.6787, + "lr": "2.00e-07", + "norm": 5.9203, + "step": 148 + }, + { + "acc_char": 0.2951, + "acc_token": 0.6641, + "epoch": 0.0, + "loss_char": 0.7608, + "loss_token": 1.7659, + "lr": "2.00e-07", + "norm": 5.8751, + "step": 149 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6759, + "epoch": 0.0, + "loss_char": 0.7553, + "loss_token": 1.6277, + "lr": "2.00e-07", + "norm": 5.7304, + "step": 150 + }, + { + "acc_char": 0.3175, + "acc_token": 0.7099, + "epoch": 0.0, + "loss_char": 0.6529, + "loss_token": 1.5657, + "lr": "2.00e-07", + "norm": 5.6377, + "step": 151 + }, + { + "acc_char": 0.3109, + "acc_token": 0.6978, + "epoch": 0.0, + "loss_char": 0.6993, + "loss_token": 1.6537, + "lr": "2.00e-07", + "norm": 5.6356, + "step": 152 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6638, + "epoch": 0.0, + "loss_char": 0.7891, + "loss_token": 1.5923, + "lr": "2.00e-07", + "norm": 5.7508, + "step": 153 + }, + { + "acc_char": 0.3023, + "acc_token": 0.7008, + "epoch": 0.0, + "loss_char": 0.6588, + "loss_token": 1.634, + "lr": "2.00e-07", + "norm": 5.6687, + "step": 154 + }, + { + "acc_char": 0.2921, + "acc_token": 0.6568, + "epoch": 0.0, + "loss_char": 0.818, + "loss_token": 1.7285, + "lr": "2.00e-07", + "norm": 5.7942, + "step": 155 + }, + { + "acc_char": 0.3148, + "acc_token": 0.7104, + "epoch": 0.0, + "loss_char": 0.6577, + "loss_token": 1.5699, + "lr": "2.00e-07", + "norm": 5.7228, + "step": 156 + }, + { + "acc_char": 0.2811, + "acc_token": 0.6123, + "epoch": 0.0, + "loss_char": 0.9048, + "loss_token": 1.832, + "lr": "2.00e-07", + "norm": 5.4958, + "step": 157 + }, + { + "acc_char": 0.325, + "acc_token": 0.7288, + "epoch": 0.0, + "loss_char": 0.6283, + "loss_token": 1.5255, + "lr": "2.00e-07", + "norm": 5.4465, + "step": 158 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6789, + "epoch": 0.0, + "loss_char": 0.7332, + "loss_token": 1.5868, + "lr": "2.00e-07", + "norm": 5.4336, + "step": 159 + }, + { + "acc_char": 0.2975, + "acc_token": 0.6689, + "epoch": 0.0, + "loss_char": 0.7776, + "loss_token": 1.7173, + "lr": "2.00e-07", + "norm": 5.8854, + "step": 160 + }, + { + "acc_char": 0.2975, + "acc_token": 0.6693, + "epoch": 0.0, + "loss_char": 0.7565, + "loss_token": 1.7072, + "lr": "2.00e-07", + "norm": 6.249, + "step": 161 + }, + { + "acc_char": 0.2866, + "acc_token": 0.6611, + "epoch": 0.0, + "loss_char": 0.791, + "loss_token": 1.7247, + "lr": "2.00e-07", + "norm": 5.859, + "step": 162 + }, + { + "acc_char": 0.3141, + "acc_token": 0.7247, + "epoch": 0.0, + "loss_char": 0.5933, + "loss_token": 1.5428, + "lr": "2.00e-07", + "norm": 5.4944, + "step": 163 + }, + { + "acc_char": 0.294, + "acc_token": 0.6729, + "epoch": 0.0, + "loss_char": 0.7727, + "loss_token": 1.6252, + "lr": "2.00e-07", + "norm": 6.101, + "step": 164 + }, + { + "acc_char": 0.2924, + "acc_token": 0.6723, + "epoch": 0.0, + "loss_char": 0.7185, + "loss_token": 1.6621, + "lr": "2.00e-07", + "norm": 5.9544, + "step": 165 + }, + { + "acc_char": 0.3052, + "acc_token": 0.689, + "epoch": 0.0, + "loss_char": 0.7298, + "loss_token": 1.5926, + "lr": "2.00e-07", + "norm": 5.7368, + "step": 166 + }, + { + "acc_char": 0.3097, + "acc_token": 0.6985, + "epoch": 0.0, + "loss_char": 0.6821, + "loss_token": 1.6133, + "lr": "2.00e-07", + "norm": 5.7521, + "step": 167 + }, + { + "acc_char": 0.3013, + "acc_token": 0.6768, + "epoch": 0.0, + "loss_char": 0.7478, + "loss_token": 1.6652, + "lr": "2.00e-07", + "norm": 5.5591, + "step": 168 + }, + { + "acc_char": 0.2904, + "acc_token": 0.6626, + "epoch": 0.0, + "loss_char": 0.7737, + "loss_token": 1.6033, + "lr": "2.00e-07", + "norm": 5.6183, + "step": 169 + }, + { + "acc_char": 0.2984, + "acc_token": 0.6925, + "epoch": 0.0, + "loss_char": 0.6733, + "loss_token": 1.5176, + "lr": "2.00e-07", + "norm": 5.4951, + "step": 170 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6942, + "epoch": 0.0, + "loss_char": 0.6754, + "loss_token": 1.6154, + "lr": "2.00e-07", + "norm": 5.7609, + "step": 171 + }, + { + "acc_char": 0.3067, + "acc_token": 0.6891, + "epoch": 0.0, + "loss_char": 0.7122, + "loss_token": 1.6741, + "lr": "2.00e-07", + "norm": 5.5804, + "step": 172 + }, + { + "acc_char": 0.2927, + "acc_token": 0.6688, + "epoch": 0.0, + "loss_char": 0.7944, + "loss_token": 1.7406, + "lr": "2.00e-07", + "norm": 5.8865, + "step": 173 + }, + { + "acc_char": 0.2974, + "acc_token": 0.6814, + "epoch": 0.0, + "loss_char": 0.7164, + "loss_token": 1.6779, + "lr": "2.00e-07", + "norm": 5.9016, + "step": 174 + }, + { + "acc_char": 0.3147, + "acc_token": 0.7079, + "epoch": 0.0, + "loss_char": 0.6844, + "loss_token": 1.6573, + "lr": "2.00e-07", + "norm": 5.9278, + "step": 175 + }, + { + "acc_char": 0.3126, + "acc_token": 0.7029, + "epoch": 0.0, + "loss_char": 0.6736, + "loss_token": 1.615, + "lr": "2.00e-07", + "norm": 5.5758, + "step": 176 + }, + { + "acc_char": 0.2953, + "acc_token": 0.6622, + "epoch": 0.0, + "loss_char": 0.7834, + "loss_token": 1.7336, + "lr": "2.00e-07", + "norm": 5.8171, + "step": 177 + }, + { + "acc_char": 0.2839, + "acc_token": 0.6287, + "epoch": 0.0, + "loss_char": 0.8726, + "loss_token": 1.7605, + "lr": "2.00e-07", + "norm": 5.4992, + "step": 178 + }, + { + "acc_char": 0.2985, + "acc_token": 0.6735, + "epoch": 0.0, + "loss_char": 0.7581, + "loss_token": 1.6939, + "lr": "2.00e-07", + "norm": 5.8877, + "step": 179 + }, + { + "acc_char": 0.3101, + "acc_token": 0.6953, + "epoch": 0.0, + "loss_char": 0.6897, + "loss_token": 1.5523, + "lr": "2.00e-07", + "norm": 5.3722, + "step": 180 + }, + { + "acc_char": 0.3193, + "acc_token": 0.6913, + "epoch": 0.0, + "loss_char": 0.6644, + "loss_token": 1.6021, + "lr": "2.00e-07", + "norm": 5.3646, + "step": 181 + }, + { + "acc_char": 0.3162, + "acc_token": 0.7104, + "epoch": 0.0, + "loss_char": 0.6554, + "loss_token": 1.6126, + "lr": "2.00e-07", + "norm": 5.4689, + "step": 182 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6559, + "epoch": 0.0, + "loss_char": 0.7793, + "loss_token": 1.7622, + "lr": "2.00e-07", + "norm": 5.4834, + "step": 183 + }, + { + "acc_char": 0.3046, + "acc_token": 0.6817, + "epoch": 0.0, + "loss_char": 0.7293, + "loss_token": 1.6286, + "lr": "2.00e-07", + "norm": 5.63, + "step": 184 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6837, + "epoch": 0.0, + "loss_char": 0.6733, + "loss_token": 1.6232, + "lr": "2.00e-07", + "norm": 5.4997, + "step": 185 + }, + { + "acc_char": 0.3174, + "acc_token": 0.7007, + "epoch": 0.0, + "loss_char": 0.6977, + "loss_token": 1.5801, + "lr": "2.00e-07", + "norm": 5.533, + "step": 186 + }, + { + "acc_char": 0.2927, + "acc_token": 0.6678, + "epoch": 0.0, + "loss_char": 0.7497, + "loss_token": 1.6652, + "lr": "2.00e-07", + "norm": 5.7318, + "step": 187 + }, + { + "acc_char": 0.2958, + "acc_token": 0.6793, + "epoch": 0.0, + "loss_char": 0.7117, + "loss_token": 1.6247, + "lr": "2.00e-07", + "norm": 5.558, + "step": 188 + }, + { + "acc_char": 0.299, + "acc_token": 0.6745, + "epoch": 0.0, + "loss_char": 0.7525, + "loss_token": 1.6932, + "lr": "2.00e-07", + "norm": 5.8052, + "step": 189 + }, + { + "acc_char": 0.3185, + "acc_token": 0.7051, + "epoch": 0.0, + "loss_char": 0.6852, + "loss_token": 1.5308, + "lr": "2.00e-07", + "norm": 5.7116, + "step": 190 + }, + { + "acc_char": 0.2858, + "acc_token": 0.6439, + "epoch": 0.0, + "loss_char": 0.8477, + "loss_token": 1.7375, + "lr": "2.00e-07", + "norm": 5.8329, + "step": 191 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6782, + "epoch": 0.0, + "loss_char": 0.7316, + "loss_token": 1.6276, + "lr": "2.00e-07", + "norm": 5.6723, + "step": 192 + }, + { + "acc_char": 0.296, + "acc_token": 0.6798, + "epoch": 0.0, + "loss_char": 0.7162, + "loss_token": 1.6456, + "lr": "2.00e-07", + "norm": 5.5331, + "step": 193 + }, + { + "acc_char": 0.3012, + "acc_token": 0.6737, + "epoch": 0.0, + "loss_char": 0.753, + "loss_token": 1.6082, + "lr": "2.00e-07", + "norm": 5.8935, + "step": 194 + }, + { + "acc_char": 0.2792, + "acc_token": 0.6171, + "epoch": 0.0, + "loss_char": 0.8842, + "loss_token": 1.8427, + "lr": "2.00e-07", + "norm": 5.3946, + "step": 195 + }, + { + "acc_char": 0.2965, + "acc_token": 0.6732, + "epoch": 0.0, + "loss_char": 0.7525, + "loss_token": 1.6317, + "lr": "2.00e-07", + "norm": 5.5915, + "step": 196 + }, + { + "acc_char": 0.296, + "acc_token": 0.6791, + "epoch": 0.0, + "loss_char": 0.7122, + "loss_token": 1.6854, + "lr": "2.00e-07", + "norm": 5.656, + "step": 197 + }, + { + "acc_char": 0.3108, + "acc_token": 0.6935, + "epoch": 0.0, + "loss_char": 0.7203, + "loss_token": 1.6353, + "lr": "2.00e-07", + "norm": 5.8489, + "step": 198 + }, + { + "acc_char": 0.3209, + "acc_token": 0.6995, + "epoch": 0.0, + "loss_char": 0.6958, + "loss_token": 1.5762, + "lr": "2.00e-07", + "norm": 5.6491, + "step": 199 + }, + { + "acc_char": 0.2956, + "acc_token": 0.6688, + "epoch": 0.0, + "loss_char": 0.7339, + "loss_token": 1.6059, + "lr": "2.00e-07", + "norm": 5.7993, + "step": 200 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6711, + "epoch": 0.0, + "loss_char": 0.7747, + "loss_token": 1.6607, + "lr": "2.00e-07", + "norm": 5.8025, + "step": 201 + }, + { + "acc_char": 0.3159, + "acc_token": 0.7115, + "epoch": 0.0, + "loss_char": 0.6561, + "loss_token": 1.5915, + "lr": "2.00e-07", + "norm": 5.5739, + "step": 202 + }, + { + "acc_char": 0.314, + "acc_token": 0.6954, + "epoch": 0.0, + "loss_char": 0.7071, + "loss_token": 1.5973, + "lr": "2.00e-07", + "norm": 5.7454, + "step": 203 + }, + { + "acc_char": 0.2964, + "acc_token": 0.6685, + "epoch": 0.0, + "loss_char": 0.7321, + "loss_token": 1.6484, + "lr": "2.00e-07", + "norm": 6.565, + "step": 204 + }, + { + "acc_char": 0.2827, + "acc_token": 0.6502, + "epoch": 0.0, + "loss_char": 0.7909, + "loss_token": 1.7053, + "lr": "2.00e-07", + "norm": 5.8039, + "step": 205 + }, + { + "acc_char": 0.3067, + "acc_token": 0.7046, + "epoch": 0.0, + "loss_char": 0.6551, + "loss_token": 1.5926, + "lr": "2.00e-07", + "norm": 5.8913, + "step": 206 + }, + { + "acc_char": 0.3069, + "acc_token": 0.6929, + "epoch": 0.0, + "loss_char": 0.6949, + "loss_token": 1.575, + "lr": "2.00e-07", + "norm": 5.4935, + "step": 207 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6792, + "epoch": 0.0, + "loss_char": 0.7197, + "loss_token": 1.6739, + "lr": "2.00e-07", + "norm": 5.7478, + "step": 208 + }, + { + "acc_char": 0.3042, + "acc_token": 0.6711, + "epoch": 0.0, + "loss_char": 0.7623, + "loss_token": 1.6577, + "lr": "2.00e-07", + "norm": 5.6784, + "step": 209 + }, + { + "acc_char": 0.2935, + "acc_token": 0.6707, + "epoch": 0.0, + "loss_char": 0.7702, + "loss_token": 1.6913, + "lr": "2.00e-07", + "norm": 5.8799, + "step": 210 + }, + { + "acc_char": 0.3207, + "acc_token": 0.7217, + "epoch": 0.0, + "loss_char": 0.6361, + "loss_token": 1.5228, + "lr": "2.00e-07", + "norm": 5.6157, + "step": 211 + }, + { + "acc_char": 0.297, + "acc_token": 0.6767, + "epoch": 0.0, + "loss_char": 0.7229, + "loss_token": 1.5723, + "lr": "2.00e-07", + "norm": 5.5983, + "step": 212 + }, + { + "acc_char": 0.3048, + "acc_token": 0.6875, + "epoch": 0.0, + "loss_char": 0.7076, + "loss_token": 1.6463, + "lr": "2.00e-07", + "norm": 5.891, + "step": 213 + }, + { + "acc_char": 0.3108, + "acc_token": 0.7008, + "epoch": 0.0, + "loss_char": 0.6808, + "loss_token": 1.6064, + "lr": "2.00e-07", + "norm": 5.5738, + "step": 214 + }, + { + "acc_char": 0.3038, + "acc_token": 0.684, + "epoch": 0.0, + "loss_char": 0.7254, + "loss_token": 1.6527, + "lr": "2.00e-07", + "norm": 5.8218, + "step": 215 + }, + { + "acc_char": 0.2914, + "acc_token": 0.6668, + "epoch": 0.0, + "loss_char": 0.7661, + "loss_token": 1.6431, + "lr": "2.00e-07", + "norm": 5.4766, + "step": 216 + }, + { + "acc_char": 0.3127, + "acc_token": 0.703, + "epoch": 0.0, + "loss_char": 0.6885, + "loss_token": 1.6306, + "lr": "2.00e-07", + "norm": 5.6474, + "step": 217 + }, + { + "acc_char": 0.2957, + "acc_token": 0.662, + "epoch": 0.0, + "loss_char": 0.7833, + "loss_token": 1.6566, + "lr": "2.00e-07", + "norm": 7.2402, + "step": 218 + }, + { + "acc_char": 0.3161, + "acc_token": 0.6974, + "epoch": 0.0, + "loss_char": 0.6961, + "loss_token": 1.5986, + "lr": "2.00e-07", + "norm": 6.046, + "step": 219 + }, + { + "acc_char": 0.2773, + "acc_token": 0.6116, + "epoch": 0.0, + "loss_char": 0.8903, + "loss_token": 1.7691, + "lr": "2.00e-07", + "norm": 5.8258, + "step": 220 + }, + { + "acc_char": 0.3011, + "acc_token": 0.6851, + "epoch": 0.0, + "loss_char": 0.7189, + "loss_token": 1.6715, + "lr": "2.00e-07", + "norm": 5.7991, + "step": 221 + }, + { + "acc_char": 0.2985, + "acc_token": 0.6706, + "epoch": 0.0, + "loss_char": 0.7537, + "loss_token": 1.657, + "lr": "2.00e-07", + "norm": 5.9313, + "step": 222 + }, + { + "acc_char": 0.2766, + "acc_token": 0.6166, + "epoch": 0.0, + "loss_char": 0.8881, + "loss_token": 1.8768, + "lr": "2.00e-07", + "norm": 5.6931, + "step": 223 + }, + { + "acc_char": 0.2935, + "acc_token": 0.6561, + "epoch": 0.0, + "loss_char": 0.7996, + "loss_token": 1.6949, + "lr": "2.00e-07", + "norm": 6.053, + "step": 224 + }, + { + "acc_char": 0.305, + "acc_token": 0.6896, + "epoch": 0.0, + "loss_char": 0.7181, + "loss_token": 1.6583, + "lr": "2.00e-07", + "norm": 5.6742, + "step": 225 + }, + { + "acc_char": 0.2892, + "acc_token": 0.6651, + "epoch": 0.0, + "loss_char": 0.7748, + "loss_token": 1.681, + "lr": "2.00e-07", + "norm": 5.7138, + "step": 226 + }, + { + "acc_char": 0.282, + "acc_token": 0.6174, + "epoch": 0.0, + "loss_char": 0.9016, + "loss_token": 1.7903, + "lr": "2.00e-07", + "norm": 5.938, + "step": 227 + }, + { + "acc_char": 0.2972, + "acc_token": 0.6766, + "epoch": 0.0, + "loss_char": 0.7571, + "loss_token": 1.7066, + "lr": "2.00e-07", + "norm": 5.8829, + "step": 228 + }, + { + "acc_char": 0.3053, + "acc_token": 0.6894, + "epoch": 0.0, + "loss_char": 0.7142, + "loss_token": 1.6113, + "lr": "2.00e-07", + "norm": 5.8733, + "step": 229 + }, + { + "acc_char": 0.2838, + "acc_token": 0.656, + "epoch": 0.0, + "loss_char": 0.7629, + "loss_token": 1.7732, + "lr": "2.00e-07", + "norm": 6.0202, + "step": 230 + }, + { + "acc_char": 0.3108, + "acc_token": 0.7016, + "epoch": 0.0, + "loss_char": 0.6778, + "loss_token": 1.6731, + "lr": "2.00e-07", + "norm": 5.672, + "step": 231 + }, + { + "acc_char": 0.308, + "acc_token": 0.703, + "epoch": 0.0, + "loss_char": 0.6667, + "loss_token": 1.5779, + "lr": "2.00e-07", + "norm": 5.6096, + "step": 232 + }, + { + "acc_char": 0.304, + "acc_token": 0.6848, + "epoch": 0.0, + "loss_char": 0.7392, + "loss_token": 1.6087, + "lr": "2.00e-07", + "norm": 5.634, + "step": 233 + }, + { + "acc_char": 0.3052, + "acc_token": 0.689, + "epoch": 0.0, + "loss_char": 0.6903, + "loss_token": 1.6823, + "lr": "2.00e-07", + "norm": 5.6758, + "step": 234 + }, + { + "acc_char": 0.3041, + "acc_token": 0.6942, + "epoch": 0.0, + "loss_char": 0.6867, + "loss_token": 1.5694, + "lr": "2.00e-07", + "norm": 5.8105, + "step": 235 + }, + { + "acc_char": 0.2992, + "acc_token": 0.6725, + "epoch": 0.0, + "loss_char": 0.7675, + "loss_token": 1.8046, + "lr": "2.00e-07", + "norm": 5.8105, + "step": 236 + }, + { + "acc_char": 0.3097, + "acc_token": 0.6881, + "epoch": 0.0, + "loss_char": 0.7139, + "loss_token": 1.6435, + "lr": "2.00e-07", + "norm": 5.7911, + "step": 237 + }, + { + "acc_char": 0.2896, + "acc_token": 0.6658, + "epoch": 0.0, + "loss_char": 0.7611, + "loss_token": 1.6792, + "lr": "2.00e-07", + "norm": 5.6335, + "step": 238 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6747, + "epoch": 0.0, + "loss_char": 0.7615, + "loss_token": 1.6733, + "lr": "2.00e-07", + "norm": 5.9079, + "step": 239 + }, + { + "acc_char": 0.302, + "acc_token": 0.6832, + "epoch": 0.0, + "loss_char": 0.7202, + "loss_token": 1.6677, + "lr": "2.00e-07", + "norm": 5.9424, + "step": 240 + }, + { + "acc_char": 0.302, + "acc_token": 0.6742, + "epoch": 0.0, + "loss_char": 0.7599, + "loss_token": 1.6017, + "lr": "2.00e-07", + "norm": 6.373, + "step": 241 + }, + { + "acc_char": 0.3049, + "acc_token": 0.7066, + "epoch": 0.0, + "loss_char": 0.6629, + "loss_token": 1.6348, + "lr": "2.00e-07", + "norm": 5.6642, + "step": 242 + }, + { + "acc_char": 0.291, + "acc_token": 0.6323, + "epoch": 0.0, + "loss_char": 0.8309, + "loss_token": 1.6843, + "lr": "2.00e-07", + "norm": 7.0208, + "step": 243 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6944, + "epoch": 0.0, + "loss_char": 0.6775, + "loss_token": 1.6127, + "lr": "2.00e-07", + "norm": 5.8339, + "step": 244 + }, + { + "acc_char": 0.2927, + "acc_token": 0.6734, + "epoch": 0.0, + "loss_char": 0.7555, + "loss_token": 1.7228, + "lr": "2.00e-07", + "norm": 5.8693, + "step": 245 + }, + { + "acc_char": 0.3026, + "acc_token": 0.6829, + "epoch": 0.0, + "loss_char": 0.7342, + "loss_token": 1.6397, + "lr": "2.00e-07", + "norm": 5.6664, + "step": 246 + }, + { + "acc_char": 0.3078, + "acc_token": 0.6956, + "epoch": 0.0, + "loss_char": 0.7104, + "loss_token": 1.713, + "lr": "2.00e-07", + "norm": 6.3569, + "step": 247 + }, + { + "acc_char": 0.2887, + "acc_token": 0.6516, + "epoch": 0.0, + "loss_char": 0.8043, + "loss_token": 1.7693, + "lr": "2.00e-07", + "norm": 6.1627, + "step": 248 + }, + { + "acc_char": 0.315, + "acc_token": 0.7136, + "epoch": 0.0, + "loss_char": 0.6514, + "loss_token": 1.5165, + "lr": "2.00e-07", + "norm": 5.6597, + "step": 249 + }, + { + "acc_char": 0.3168, + "acc_token": 0.7127, + "epoch": 0.0, + "loss_char": 0.6437, + "loss_token": 1.5805, + "lr": "2.00e-07", + "norm": 5.6713, + "step": 250 + }, + { + "acc_char": 0.3089, + "acc_token": 0.7065, + "epoch": 0.0, + "loss_char": 0.6508, + "loss_token": 1.5944, + "lr": "2.00e-07", + "norm": 5.5517, + "step": 251 + }, + { + "acc_char": 0.3234, + "acc_token": 0.7193, + "epoch": 0.0, + "loss_char": 0.6507, + "loss_token": 1.5453, + "lr": "2.00e-07", + "norm": 5.8191, + "step": 252 + }, + { + "acc_char": 0.2985, + "acc_token": 0.6924, + "epoch": 0.0, + "loss_char": 0.6841, + "loss_token": 1.6334, + "lr": "2.00e-07", + "norm": 5.5597, + "step": 253 + }, + { + "acc_char": 0.3017, + "acc_token": 0.6705, + "epoch": 0.0, + "loss_char": 0.7677, + "loss_token": 1.5948, + "lr": "2.00e-07", + "norm": 6.1439, + "step": 254 + }, + { + "acc_char": 0.3057, + "acc_token": 0.6895, + "epoch": 0.0, + "loss_char": 0.7055, + "loss_token": 1.5337, + "lr": "2.00e-07", + "norm": 5.4335, + "step": 255 + }, + { + "acc_char": 0.2927, + "acc_token": 0.6691, + "epoch": 0.0, + "loss_char": 0.7783, + "loss_token": 1.6823, + "lr": "2.00e-07", + "norm": 5.8874, + "step": 256 + }, + { + "acc_char": 0.3109, + "acc_token": 0.6977, + "epoch": 0.0, + "loss_char": 0.7025, + "loss_token": 1.522, + "lr": "2.00e-07", + "norm": 5.4743, + "step": 257 + }, + { + "acc_char": 0.2988, + "acc_token": 0.6697, + "epoch": 0.0, + "loss_char": 0.7659, + "loss_token": 1.6629, + "lr": "2.00e-07", + "norm": 5.773, + "step": 258 + }, + { + "acc_char": 0.3034, + "acc_token": 0.6779, + "epoch": 0.0, + "loss_char": 0.749, + "loss_token": 1.5817, + "lr": "2.00e-07", + "norm": 5.7499, + "step": 259 + }, + { + "acc_char": 0.3071, + "acc_token": 0.7046, + "epoch": 0.0, + "loss_char": 0.6483, + "loss_token": 1.5455, + "lr": "2.00e-07", + "norm": 5.4589, + "step": 260 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6709, + "epoch": 0.0, + "loss_char": 0.7631, + "loss_token": 1.7526, + "lr": "2.00e-07", + "norm": 5.8391, + "step": 261 + }, + { + "acc_char": 0.2992, + "acc_token": 0.6824, + "epoch": 0.0, + "loss_char": 0.7006, + "loss_token": 1.6149, + "lr": "2.00e-07", + "norm": 5.4798, + "step": 262 + }, + { + "acc_char": 0.2893, + "acc_token": 0.673, + "epoch": 0.0, + "loss_char": 0.7407, + "loss_token": 1.6473, + "lr": "2.00e-07", + "norm": 6.0615, + "step": 263 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6681, + "epoch": 0.0, + "loss_char": 0.7942, + "loss_token": 1.6474, + "lr": "2.00e-07", + "norm": 6.1056, + "step": 264 + }, + { + "acc_char": 0.3208, + "acc_token": 0.7085, + "epoch": 0.0, + "loss_char": 0.6816, + "loss_token": 1.6371, + "lr": "2.00e-07", + "norm": 5.5811, + "step": 265 + }, + { + "acc_char": 0.2961, + "acc_token": 0.6715, + "epoch": 0.0, + "loss_char": 0.736, + "loss_token": 1.7145, + "lr": "2.00e-07", + "norm": 5.7918, + "step": 266 + }, + { + "acc_char": 0.3118, + "acc_token": 0.6992, + "epoch": 0.0, + "loss_char": 0.6895, + "loss_token": 1.5566, + "lr": "2.00e-07", + "norm": 5.6788, + "step": 267 + }, + { + "acc_char": 0.3006, + "acc_token": 0.6892, + "epoch": 0.0, + "loss_char": 0.7024, + "loss_token": 1.6323, + "lr": "2.00e-07", + "norm": 5.6619, + "step": 268 + }, + { + "acc_char": 0.3243, + "acc_token": 0.7098, + "epoch": 0.0, + "loss_char": 0.6818, + "loss_token": 1.3966, + "lr": "2.00e-07", + "norm": 5.6444, + "step": 269 + }, + { + "acc_char": 0.2971, + "acc_token": 0.6789, + "epoch": 0.0, + "loss_char": 0.7399, + "loss_token": 1.6553, + "lr": "2.00e-07", + "norm": 5.9755, + "step": 270 + }, + { + "acc_char": 0.3015, + "acc_token": 0.687, + "epoch": 0.0, + "loss_char": 0.7078, + "loss_token": 1.6638, + "lr": "2.00e-07", + "norm": 5.673, + "step": 271 + }, + { + "acc_char": 0.3144, + "acc_token": 0.7131, + "epoch": 0.0, + "loss_char": 0.6484, + "loss_token": 1.5826, + "lr": "2.00e-07", + "norm": 5.4967, + "step": 272 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6729, + "epoch": 0.0, + "loss_char": 0.744, + "loss_token": 1.6038, + "lr": "2.00e-07", + "norm": 6.0506, + "step": 273 + }, + { + "acc_char": 0.3145, + "acc_token": 0.7126, + "epoch": 0.01, + "loss_char": 0.6452, + "loss_token": 1.5216, + "lr": "2.00e-07", + "norm": 5.404, + "step": 274 + }, + { + "acc_char": 0.3076, + "acc_token": 0.696, + "epoch": 0.01, + "loss_char": 0.6986, + "loss_token": 1.5858, + "lr": "2.00e-07", + "norm": 5.7001, + "step": 275 + }, + { + "acc_char": 0.2755, + "acc_token": 0.6011, + "epoch": 0.01, + "loss_char": 0.9312, + "loss_token": 1.6963, + "lr": "2.00e-07", + "norm": 5.6149, + "step": 276 + }, + { + "acc_char": 0.3184, + "acc_token": 0.7168, + "epoch": 0.01, + "loss_char": 0.6472, + "loss_token": 1.5169, + "lr": "2.00e-07", + "norm": 5.729, + "step": 277 + }, + { + "acc_char": 0.2988, + "acc_token": 0.6767, + "epoch": 0.01, + "loss_char": 0.7273, + "loss_token": 1.6534, + "lr": "2.00e-07", + "norm": 5.7457, + "step": 278 + }, + { + "acc_char": 0.3217, + "acc_token": 0.7216, + "epoch": 0.01, + "loss_char": 0.6424, + "loss_token": 1.632, + "lr": "2.00e-07", + "norm": 5.6512, + "step": 279 + }, + { + "acc_char": 0.3261, + "acc_token": 0.7308, + "epoch": 0.01, + "loss_char": 0.631, + "loss_token": 1.5274, + "lr": "2.00e-07", + "norm": 5.8094, + "step": 280 + }, + { + "acc_char": 0.3047, + "acc_token": 0.6862, + "epoch": 0.01, + "loss_char": 0.7273, + "loss_token": 1.6198, + "lr": "2.00e-07", + "norm": 5.5336, + "step": 281 + }, + { + "acc_char": 0.306, + "acc_token": 0.6835, + "epoch": 0.01, + "loss_char": 0.7451, + "loss_token": 1.6363, + "lr": "2.00e-07", + "norm": 5.6988, + "step": 282 + }, + { + "acc_char": 0.3034, + "acc_token": 0.6924, + "epoch": 0.01, + "loss_char": 0.6886, + "loss_token": 1.5916, + "lr": "2.00e-07", + "norm": 5.468, + "step": 283 + }, + { + "acc_char": 0.3107, + "acc_token": 0.692, + "epoch": 0.01, + "loss_char": 0.7125, + "loss_token": 1.6719, + "lr": "2.00e-07", + "norm": 5.9679, + "step": 284 + }, + { + "acc_char": 0.2917, + "acc_token": 0.6532, + "epoch": 0.01, + "loss_char": 0.8194, + "loss_token": 1.7125, + "lr": "2.00e-07", + "norm": 6.0751, + "step": 285 + }, + { + "acc_char": 0.3283, + "acc_token": 0.7289, + "epoch": 0.01, + "loss_char": 0.6037, + "loss_token": 1.4546, + "lr": "2.00e-07", + "norm": 5.5938, + "step": 286 + }, + { + "acc_char": 0.3191, + "acc_token": 0.7103, + "epoch": 0.01, + "loss_char": 0.6685, + "loss_token": 1.5951, + "lr": "2.00e-07", + "norm": 5.5813, + "step": 287 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6766, + "epoch": 0.01, + "loss_char": 0.7287, + "loss_token": 1.6345, + "lr": "2.00e-07", + "norm": 5.9596, + "step": 288 + }, + { + "acc_char": 0.2962, + "acc_token": 0.6717, + "epoch": 0.01, + "loss_char": 0.7433, + "loss_token": 1.6893, + "lr": "2.00e-07", + "norm": 5.7588, + "step": 289 + }, + { + "acc_char": 0.3013, + "acc_token": 0.6765, + "epoch": 0.01, + "loss_char": 0.7459, + "loss_token": 1.6535, + "lr": "2.00e-07", + "norm": 5.8478, + "step": 290 + }, + { + "acc_char": 0.3181, + "acc_token": 0.7165, + "epoch": 0.01, + "loss_char": 0.64, + "loss_token": 1.5437, + "lr": "2.00e-07", + "norm": 5.387, + "step": 291 + }, + { + "acc_char": 0.2807, + "acc_token": 0.6105, + "epoch": 0.01, + "loss_char": 0.8958, + "loss_token": 1.8091, + "lr": "2.00e-07", + "norm": 5.8084, + "step": 292 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6883, + "epoch": 0.01, + "loss_char": 0.6985, + "loss_token": 1.5708, + "lr": "2.00e-07", + "norm": 5.5178, + "step": 293 + }, + { + "acc_char": 0.3006, + "acc_token": 0.6808, + "epoch": 0.01, + "loss_char": 0.7287, + "loss_token": 1.6012, + "lr": "2.00e-07", + "norm": 5.6108, + "step": 294 + }, + { + "acc_char": 0.2975, + "acc_token": 0.671, + "epoch": 0.01, + "loss_char": 0.7498, + "loss_token": 1.7116, + "lr": "2.00e-07", + "norm": 5.4779, + "step": 295 + }, + { + "acc_char": 0.3121, + "acc_token": 0.7031, + "epoch": 0.01, + "loss_char": 0.6763, + "loss_token": 1.5036, + "lr": "2.00e-07", + "norm": 5.49, + "step": 296 + }, + { + "acc_char": 0.3, + "acc_token": 0.6808, + "epoch": 0.01, + "loss_char": 0.7139, + "loss_token": 1.6118, + "lr": "2.00e-07", + "norm": 5.3666, + "step": 297 + }, + { + "acc_char": 0.2984, + "acc_token": 0.6774, + "epoch": 0.01, + "loss_char": 0.7645, + "loss_token": 1.6297, + "lr": "2.00e-07", + "norm": 5.8568, + "step": 298 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6744, + "epoch": 0.01, + "loss_char": 0.7564, + "loss_token": 1.5995, + "lr": "2.00e-07", + "norm": 5.9268, + "step": 299 + }, + { + "acc_char": 0.3044, + "acc_token": 0.6908, + "epoch": 0.01, + "loss_char": 0.7074, + "loss_token": 1.5607, + "lr": "2.00e-07", + "norm": 5.5007, + "step": 300 + }, + { + "acc_char": 0.3061, + "acc_token": 0.6878, + "epoch": 0.01, + "loss_char": 0.7224, + "loss_token": 1.6879, + "lr": "2.00e-07", + "norm": 5.6493, + "step": 301 + }, + { + "acc_char": 0.2944, + "acc_token": 0.6705, + "epoch": 0.01, + "loss_char": 0.7456, + "loss_token": 1.609, + "lr": "2.00e-07", + "norm": 5.7389, + "step": 302 + }, + { + "acc_char": 0.2968, + "acc_token": 0.6759, + "epoch": 0.01, + "loss_char": 0.7284, + "loss_token": 1.6731, + "lr": "2.00e-07", + "norm": 5.523, + "step": 303 + }, + { + "acc_char": 0.2979, + "acc_token": 0.6824, + "epoch": 0.01, + "loss_char": 0.7128, + "loss_token": 1.5829, + "lr": "2.00e-07", + "norm": 5.5804, + "step": 304 + }, + { + "acc_char": 0.2975, + "acc_token": 0.6765, + "epoch": 0.01, + "loss_char": 0.7475, + "loss_token": 1.6629, + "lr": "2.00e-07", + "norm": 5.8821, + "step": 305 + }, + { + "acc_char": 0.3019, + "acc_token": 0.6788, + "epoch": 0.01, + "loss_char": 0.7458, + "loss_token": 1.6448, + "lr": "2.00e-07", + "norm": 5.7853, + "step": 306 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6883, + "epoch": 0.01, + "loss_char": 0.7007, + "loss_token": 1.5519, + "lr": "2.00e-07", + "norm": 5.3026, + "step": 307 + }, + { + "acc_char": 0.2937, + "acc_token": 0.6658, + "epoch": 0.01, + "loss_char": 0.7739, + "loss_token": 1.7083, + "lr": "2.00e-07", + "norm": 5.9143, + "step": 308 + }, + { + "acc_char": 0.3012, + "acc_token": 0.6801, + "epoch": 0.01, + "loss_char": 0.7371, + "loss_token": 1.6619, + "lr": "2.00e-07", + "norm": 5.7714, + "step": 309 + }, + { + "acc_char": 0.306, + "acc_token": 0.6964, + "epoch": 0.01, + "loss_char": 0.681, + "loss_token": 1.656, + "lr": "2.00e-07", + "norm": 5.8231, + "step": 310 + }, + { + "acc_char": 0.3065, + "acc_token": 0.7017, + "epoch": 0.01, + "loss_char": 0.6808, + "loss_token": 1.5909, + "lr": "2.00e-07", + "norm": 5.4979, + "step": 311 + }, + { + "acc_char": 0.3129, + "acc_token": 0.6958, + "epoch": 0.01, + "loss_char": 0.7117, + "loss_token": 1.619, + "lr": "2.00e-07", + "norm": 5.7152, + "step": 312 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6706, + "epoch": 0.01, + "loss_char": 0.7657, + "loss_token": 1.6683, + "lr": "2.00e-07", + "norm": 5.6728, + "step": 313 + }, + { + "acc_char": 0.3176, + "acc_token": 0.7069, + "epoch": 0.01, + "loss_char": 0.6768, + "loss_token": 1.6478, + "lr": "2.00e-07", + "norm": 5.7274, + "step": 314 + }, + { + "acc_char": 0.2875, + "acc_token": 0.6621, + "epoch": 0.01, + "loss_char": 0.7648, + "loss_token": 1.694, + "lr": "2.00e-07", + "norm": 5.683, + "step": 315 + }, + { + "acc_char": 0.301, + "acc_token": 0.678, + "epoch": 0.01, + "loss_char": 0.7439, + "loss_token": 1.6851, + "lr": "2.00e-07", + "norm": 5.7478, + "step": 316 + }, + { + "acc_char": 0.3038, + "acc_token": 0.6839, + "epoch": 0.01, + "loss_char": 0.7546, + "loss_token": 1.5604, + "lr": "2.00e-07", + "norm": 5.7545, + "step": 317 + }, + { + "acc_char": 0.3005, + "acc_token": 0.6839, + "epoch": 0.01, + "loss_char": 0.7142, + "loss_token": 1.6036, + "lr": "2.00e-07", + "norm": 5.8406, + "step": 318 + }, + { + "acc_char": 0.309, + "acc_token": 0.6981, + "epoch": 0.01, + "loss_char": 0.6622, + "loss_token": 1.5909, + "lr": "2.00e-07", + "norm": 5.6064, + "step": 319 + }, + { + "acc_char": 0.3106, + "acc_token": 0.6836, + "epoch": 0.01, + "loss_char": 0.7384, + "loss_token": 1.6081, + "lr": "2.00e-07", + "norm": 5.972, + "step": 320 + }, + { + "acc_char": 0.3107, + "acc_token": 0.6937, + "epoch": 0.01, + "loss_char": 0.6974, + "loss_token": 1.5829, + "lr": "2.00e-07", + "norm": 5.9956, + "step": 321 + }, + { + "acc_char": 0.2968, + "acc_token": 0.6709, + "epoch": 0.01, + "loss_char": 0.7633, + "loss_token": 1.606, + "lr": "2.00e-07", + "norm": 5.7139, + "step": 322 + }, + { + "acc_char": 0.3093, + "acc_token": 0.7082, + "epoch": 0.01, + "loss_char": 0.639, + "loss_token": 1.5356, + "lr": "2.00e-07", + "norm": 5.5638, + "step": 323 + }, + { + "acc_char": 0.3121, + "acc_token": 0.711, + "epoch": 0.01, + "loss_char": 0.6582, + "loss_token": 1.5754, + "lr": "2.00e-07", + "norm": 5.5749, + "step": 324 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6806, + "epoch": 0.01, + "loss_char": 0.7369, + "loss_token": 1.612, + "lr": "2.00e-07", + "norm": 5.7404, + "step": 325 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6974, + "epoch": 0.01, + "loss_char": 0.6704, + "loss_token": 1.5965, + "lr": "2.00e-07", + "norm": 5.8155, + "step": 326 + }, + { + "acc_char": 0.2937, + "acc_token": 0.6618, + "epoch": 0.01, + "loss_char": 0.7958, + "loss_token": 1.7527, + "lr": "2.00e-07", + "norm": 5.9642, + "step": 327 + }, + { + "acc_char": 0.3047, + "acc_token": 0.6887, + "epoch": 0.01, + "loss_char": 0.7168, + "loss_token": 1.6637, + "lr": "2.00e-07", + "norm": 5.7664, + "step": 328 + }, + { + "acc_char": 0.3076, + "acc_token": 0.6757, + "epoch": 0.01, + "loss_char": 0.7671, + "loss_token": 1.7099, + "lr": "2.00e-07", + "norm": 6.0867, + "step": 329 + }, + { + "acc_char": 0.3022, + "acc_token": 0.6847, + "epoch": 0.01, + "loss_char": 0.7003, + "loss_token": 1.5672, + "lr": "2.00e-07", + "norm": 5.9077, + "step": 330 + }, + { + "acc_char": 0.3147, + "acc_token": 0.7041, + "epoch": 0.01, + "loss_char": 0.6763, + "loss_token": 1.5775, + "lr": "2.00e-07", + "norm": 5.5642, + "step": 331 + }, + { + "acc_char": 0.3008, + "acc_token": 0.673, + "epoch": 0.01, + "loss_char": 0.7647, + "loss_token": 1.6656, + "lr": "2.00e-07", + "norm": 5.7092, + "step": 332 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6857, + "epoch": 0.01, + "loss_char": 0.6981, + "loss_token": 1.6254, + "lr": "2.00e-07", + "norm": 5.8577, + "step": 333 + }, + { + "acc_char": 0.2911, + "acc_token": 0.6324, + "epoch": 0.01, + "loss_char": 0.8438, + "loss_token": 1.6785, + "lr": "2.00e-07", + "norm": 5.2846, + "step": 334 + }, + { + "acc_char": 0.3018, + "acc_token": 0.6796, + "epoch": 0.01, + "loss_char": 0.748, + "loss_token": 1.6903, + "lr": "2.00e-07", + "norm": 6.3385, + "step": 335 + }, + { + "acc_char": 0.3005, + "acc_token": 0.6533, + "epoch": 0.01, + "loss_char": 0.7987, + "loss_token": 1.722, + "lr": "2.00e-07", + "norm": 5.7037, + "step": 336 + }, + { + "acc_char": 0.2927, + "acc_token": 0.6856, + "epoch": 0.01, + "loss_char": 0.676, + "loss_token": 1.6197, + "lr": "2.00e-07", + "norm": 5.504, + "step": 337 + }, + { + "acc_char": 0.2836, + "acc_token": 0.6138, + "epoch": 0.01, + "loss_char": 0.9242, + "loss_token": 1.892, + "lr": "2.00e-07", + "norm": 5.9519, + "step": 338 + }, + { + "acc_char": 0.3061, + "acc_token": 0.69, + "epoch": 0.01, + "loss_char": 0.7105, + "loss_token": 1.6633, + "lr": "2.00e-07", + "norm": 5.6888, + "step": 339 + }, + { + "acc_char": 0.3114, + "acc_token": 0.6959, + "epoch": 0.01, + "loss_char": 0.7106, + "loss_token": 1.6273, + "lr": "2.00e-07", + "norm": 5.9986, + "step": 340 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6855, + "epoch": 0.01, + "loss_char": 0.7016, + "loss_token": 1.6079, + "lr": "2.00e-07", + "norm": 5.857, + "step": 341 + }, + { + "acc_char": 0.2979, + "acc_token": 0.6707, + "epoch": 0.01, + "loss_char": 0.7816, + "loss_token": 1.7775, + "lr": "2.00e-07", + "norm": 6.0107, + "step": 342 + }, + { + "acc_char": 0.3073, + "acc_token": 0.6961, + "epoch": 0.01, + "loss_char": 0.6894, + "loss_token": 1.6348, + "lr": "2.00e-07", + "norm": 5.8313, + "step": 343 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6846, + "epoch": 0.01, + "loss_char": 0.7451, + "loss_token": 1.574, + "lr": "2.00e-07", + "norm": 5.7596, + "step": 344 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6724, + "epoch": 0.01, + "loss_char": 0.7413, + "loss_token": 1.6613, + "lr": "2.00e-07", + "norm": 5.8325, + "step": 345 + }, + { + "acc_char": 0.3101, + "acc_token": 0.6854, + "epoch": 0.01, + "loss_char": 0.743, + "loss_token": 1.608, + "lr": "2.00e-07", + "norm": 5.5323, + "step": 346 + }, + { + "acc_char": 0.2904, + "acc_token": 0.6635, + "epoch": 0.01, + "loss_char": 0.7893, + "loss_token": 1.7397, + "lr": "2.00e-07", + "norm": 6.1739, + "step": 347 + }, + { + "acc_char": 0.3111, + "acc_token": 0.6931, + "epoch": 0.01, + "loss_char": 0.709, + "loss_token": 1.6418, + "lr": "2.00e-07", + "norm": 5.9963, + "step": 348 + }, + { + "acc_char": 0.2909, + "acc_token": 0.6682, + "epoch": 0.01, + "loss_char": 0.7396, + "loss_token": 1.6422, + "lr": "2.00e-07", + "norm": 5.6683, + "step": 349 + }, + { + "acc_char": 0.3238, + "acc_token": 0.7197, + "epoch": 0.01, + "loss_char": 0.6568, + "loss_token": 1.5539, + "lr": "2.00e-07", + "norm": 5.7899, + "step": 350 + }, + { + "acc_char": 0.3086, + "acc_token": 0.6668, + "epoch": 0.01, + "loss_char": 0.7888, + "loss_token": 1.6842, + "lr": "2.00e-07", + "norm": 5.2554, + "step": 351 + }, + { + "acc_char": 0.3072, + "acc_token": 0.6981, + "epoch": 0.01, + "loss_char": 0.6763, + "loss_token": 1.5788, + "lr": "2.00e-07", + "norm": 5.5028, + "step": 352 + }, + { + "acc_char": 0.2979, + "acc_token": 0.6875, + "epoch": 0.01, + "loss_char": 0.6671, + "loss_token": 1.6384, + "lr": "2.00e-07", + "norm": 6.1584, + "step": 353 + }, + { + "acc_char": 0.2829, + "acc_token": 0.6175, + "epoch": 0.01, + "loss_char": 0.8946, + "loss_token": 1.8867, + "lr": "2.00e-07", + "norm": 5.5444, + "step": 354 + }, + { + "acc_char": 0.3063, + "acc_token": 0.7051, + "epoch": 0.01, + "loss_char": 0.6552, + "loss_token": 1.6427, + "lr": "2.00e-07", + "norm": 5.7578, + "step": 355 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6836, + "epoch": 0.01, + "loss_char": 0.7052, + "loss_token": 1.6508, + "lr": "2.00e-07", + "norm": 5.6257, + "step": 356 + }, + { + "acc_char": 0.284, + "acc_token": 0.6558, + "epoch": 0.01, + "loss_char": 0.7454, + "loss_token": 1.6895, + "lr": "2.00e-07", + "norm": 5.9138, + "step": 357 + }, + { + "acc_char": 0.2886, + "acc_token": 0.6673, + "epoch": 0.01, + "loss_char": 0.7445, + "loss_token": 1.6311, + "lr": "2.00e-07", + "norm": 5.7174, + "step": 358 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6745, + "epoch": 0.01, + "loss_char": 0.7696, + "loss_token": 1.666, + "lr": "2.00e-07", + "norm": 5.9078, + "step": 359 + }, + { + "acc_char": 0.2994, + "acc_token": 0.6862, + "epoch": 0.01, + "loss_char": 0.7015, + "loss_token": 1.5945, + "lr": "2.00e-07", + "norm": 5.9093, + "step": 360 + }, + { + "acc_char": 0.3052, + "acc_token": 0.6858, + "epoch": 0.01, + "loss_char": 0.731, + "loss_token": 1.6641, + "lr": "2.00e-07", + "norm": 5.9643, + "step": 361 + }, + { + "acc_char": 0.3113, + "acc_token": 0.7002, + "epoch": 0.01, + "loss_char": 0.6977, + "loss_token": 1.5529, + "lr": "2.00e-07", + "norm": 5.548, + "step": 362 + }, + { + "acc_char": 0.2994, + "acc_token": 0.6823, + "epoch": 0.01, + "loss_char": 0.7226, + "loss_token": 1.6606, + "lr": "2.00e-07", + "norm": 5.6489, + "step": 363 + }, + { + "acc_char": 0.3144, + "acc_token": 0.6987, + "epoch": 0.01, + "loss_char": 0.7038, + "loss_token": 1.6622, + "lr": "2.00e-07", + "norm": 5.6968, + "step": 364 + }, + { + "acc_char": 0.3049, + "acc_token": 0.6923, + "epoch": 0.01, + "loss_char": 0.6968, + "loss_token": 1.5419, + "lr": "2.00e-07", + "norm": 5.4769, + "step": 365 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6884, + "epoch": 0.01, + "loss_char": 0.6917, + "loss_token": 1.5916, + "lr": "2.00e-07", + "norm": 5.8595, + "step": 366 + }, + { + "acc_char": 0.2899, + "acc_token": 0.6622, + "epoch": 0.01, + "loss_char": 0.774, + "loss_token": 1.6747, + "lr": "2.00e-07", + "norm": 6.1018, + "step": 367 + }, + { + "acc_char": 0.2971, + "acc_token": 0.6756, + "epoch": 0.01, + "loss_char": 0.733, + "loss_token": 1.6098, + "lr": "2.00e-07", + "norm": 5.7129, + "step": 368 + }, + { + "acc_char": 0.3075, + "acc_token": 0.6922, + "epoch": 0.01, + "loss_char": 0.6952, + "loss_token": 1.628, + "lr": "2.00e-07", + "norm": 5.8883, + "step": 369 + }, + { + "acc_char": 0.3026, + "acc_token": 0.7124, + "epoch": 0.01, + "loss_char": 0.6241, + "loss_token": 1.5773, + "lr": "2.00e-07", + "norm": 5.5128, + "step": 370 + }, + { + "acc_char": 0.299, + "acc_token": 0.6668, + "epoch": 0.01, + "loss_char": 0.784, + "loss_token": 1.6683, + "lr": "2.00e-07", + "norm": 5.9957, + "step": 371 + }, + { + "acc_char": 0.2926, + "acc_token": 0.6731, + "epoch": 0.01, + "loss_char": 0.735, + "loss_token": 1.562, + "lr": "2.00e-07", + "norm": 5.4053, + "step": 372 + }, + { + "acc_char": 0.2935, + "acc_token": 0.6759, + "epoch": 0.01, + "loss_char": 0.7445, + "loss_token": 1.7457, + "lr": "2.00e-07", + "norm": 5.9811, + "step": 373 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6718, + "epoch": 0.01, + "loss_char": 0.7618, + "loss_token": 1.6714, + "lr": "2.00e-07", + "norm": 5.7125, + "step": 374 + }, + { + "acc_char": 0.3105, + "acc_token": 0.6914, + "epoch": 0.01, + "loss_char": 0.7096, + "loss_token": 1.6491, + "lr": "2.00e-07", + "norm": 5.7288, + "step": 375 + }, + { + "acc_char": 0.3093, + "acc_token": 0.7048, + "epoch": 0.01, + "loss_char": 0.6826, + "loss_token": 1.6267, + "lr": "2.00e-07", + "norm": 5.7194, + "step": 376 + }, + { + "acc_char": 0.3067, + "acc_token": 0.6888, + "epoch": 0.01, + "loss_char": 0.6965, + "loss_token": 1.6184, + "lr": "2.00e-07", + "norm": 5.7391, + "step": 377 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6721, + "epoch": 0.01, + "loss_char": 0.7448, + "loss_token": 1.5463, + "lr": "2.00e-07", + "norm": 5.3704, + "step": 378 + }, + { + "acc_char": 0.3098, + "acc_token": 0.6886, + "epoch": 0.01, + "loss_char": 0.7316, + "loss_token": 1.6599, + "lr": "2.00e-07", + "norm": 5.8194, + "step": 379 + }, + { + "acc_char": 0.2925, + "acc_token": 0.6664, + "epoch": 0.01, + "loss_char": 0.7882, + "loss_token": 1.8153, + "lr": "2.00e-07", + "norm": 5.9483, + "step": 380 + }, + { + "acc_char": 0.2972, + "acc_token": 0.6659, + "epoch": 0.01, + "loss_char": 0.7625, + "loss_token": 1.6956, + "lr": "2.00e-07", + "norm": 5.8517, + "step": 381 + }, + { + "acc_char": 0.3071, + "acc_token": 0.6989, + "epoch": 0.01, + "loss_char": 0.6656, + "loss_token": 1.6748, + "lr": "2.00e-07", + "norm": 5.7486, + "step": 382 + }, + { + "acc_char": 0.3091, + "acc_token": 0.7008, + "epoch": 0.01, + "loss_char": 0.6794, + "loss_token": 1.5323, + "lr": "2.00e-07", + "norm": 5.2689, + "step": 383 + }, + { + "acc_char": 0.3172, + "acc_token": 0.7194, + "epoch": 0.01, + "loss_char": 0.6259, + "loss_token": 1.4796, + "lr": "2.00e-07", + "norm": 5.2914, + "step": 384 + }, + { + "acc_char": 0.3008, + "acc_token": 0.6811, + "epoch": 0.01, + "loss_char": 0.7145, + "loss_token": 1.6898, + "lr": "2.00e-07", + "norm": 5.7635, + "step": 385 + }, + { + "acc_char": 0.3045, + "acc_token": 0.6842, + "epoch": 0.01, + "loss_char": 0.7293, + "loss_token": 1.6527, + "lr": "2.00e-07", + "norm": 6.0697, + "step": 386 + }, + { + "acc_char": 0.2971, + "acc_token": 0.6735, + "epoch": 0.01, + "loss_char": 0.7487, + "loss_token": 1.6422, + "lr": "2.00e-07", + "norm": 5.7503, + "step": 387 + }, + { + "acc_char": 0.2948, + "acc_token": 0.6757, + "epoch": 0.01, + "loss_char": 0.7323, + "loss_token": 1.6915, + "lr": "2.00e-07", + "norm": 5.9776, + "step": 388 + }, + { + "acc_char": 0.307, + "acc_token": 0.6914, + "epoch": 0.01, + "loss_char": 0.7019, + "loss_token": 1.5222, + "lr": "2.00e-07", + "norm": 5.7667, + "step": 389 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6679, + "epoch": 0.01, + "loss_char": 0.7908, + "loss_token": 1.7331, + "lr": "2.00e-07", + "norm": 6.0986, + "step": 390 + }, + { + "acc_char": 0.2952, + "acc_token": 0.6688, + "epoch": 0.01, + "loss_char": 0.7422, + "loss_token": 1.6141, + "lr": "2.00e-07", + "norm": 5.766, + "step": 391 + }, + { + "acc_char": 0.2958, + "acc_token": 0.6717, + "epoch": 0.01, + "loss_char": 0.7446, + "loss_token": 1.6848, + "lr": "2.00e-07", + "norm": 5.7914, + "step": 392 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6725, + "epoch": 0.01, + "loss_char": 0.7515, + "loss_token": 1.6382, + "lr": "2.00e-07", + "norm": 5.5648, + "step": 393 + }, + { + "acc_char": 0.3051, + "acc_token": 0.6734, + "epoch": 0.01, + "loss_char": 0.7527, + "loss_token": 1.6441, + "lr": "2.00e-07", + "norm": 5.8132, + "step": 394 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6883, + "epoch": 0.01, + "loss_char": 0.6923, + "loss_token": 1.6338, + "lr": "2.00e-07", + "norm": 5.6723, + "step": 395 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6824, + "epoch": 0.01, + "loss_char": 0.7433, + "loss_token": 1.6936, + "lr": "2.00e-07", + "norm": 5.8343, + "step": 396 + }, + { + "acc_char": 0.3058, + "acc_token": 0.6873, + "epoch": 0.01, + "loss_char": 0.7231, + "loss_token": 1.6963, + "lr": "2.00e-07", + "norm": 6.3947, + "step": 397 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6678, + "epoch": 0.01, + "loss_char": 0.7728, + "loss_token": 1.5994, + "lr": "2.00e-07", + "norm": 5.501, + "step": 398 + }, + { + "acc_char": 0.3092, + "acc_token": 0.7044, + "epoch": 0.01, + "loss_char": 0.6667, + "loss_token": 1.6234, + "lr": "2.00e-07", + "norm": 5.8259, + "step": 399 + }, + { + "acc_char": 0.3093, + "acc_token": 0.6906, + "epoch": 0.01, + "loss_char": 0.7331, + "loss_token": 1.604, + "lr": "2.00e-07", + "norm": 5.7938, + "step": 400 + }, + { + "acc_char": 0.2956, + "acc_token": 0.6737, + "epoch": 0.01, + "loss_char": 0.7542, + "loss_token": 1.6651, + "lr": "2.00e-07", + "norm": 5.6362, + "step": 401 + }, + { + "acc_char": 0.2992, + "acc_token": 0.6798, + "epoch": 0.01, + "loss_char": 0.757, + "loss_token": 1.6434, + "lr": "2.00e-07", + "norm": 5.7258, + "step": 402 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6818, + "epoch": 0.01, + "loss_char": 0.7265, + "loss_token": 1.6712, + "lr": "2.00e-07", + "norm": 5.5462, + "step": 403 + }, + { + "acc_char": 0.3177, + "acc_token": 0.7261, + "epoch": 0.01, + "loss_char": 0.6203, + "loss_token": 1.4859, + "lr": "2.00e-07", + "norm": 5.4174, + "step": 404 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6801, + "epoch": 0.01, + "loss_char": 0.7286, + "loss_token": 1.6256, + "lr": "2.00e-07", + "norm": 5.3541, + "step": 405 + }, + { + "acc_char": 0.3148, + "acc_token": 0.7173, + "epoch": 0.01, + "loss_char": 0.6452, + "loss_token": 1.5515, + "lr": "2.00e-07", + "norm": 5.4521, + "step": 406 + }, + { + "acc_char": 0.3041, + "acc_token": 0.6856, + "epoch": 0.01, + "loss_char": 0.7246, + "loss_token": 1.6415, + "lr": "2.00e-07", + "norm": 5.712, + "step": 407 + }, + { + "acc_char": 0.3038, + "acc_token": 0.693, + "epoch": 0.01, + "loss_char": 0.7085, + "loss_token": 1.6706, + "lr": "2.00e-07", + "norm": 5.8686, + "step": 408 + }, + { + "acc_char": 0.281, + "acc_token": 0.6554, + "epoch": 0.01, + "loss_char": 0.7944, + "loss_token": 1.728, + "lr": "2.00e-07", + "norm": 5.8449, + "step": 409 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6939, + "epoch": 0.01, + "loss_char": 0.6905, + "loss_token": 1.6071, + "lr": "2.00e-07", + "norm": 5.521, + "step": 410 + }, + { + "acc_char": 0.3061, + "acc_token": 0.6905, + "epoch": 0.01, + "loss_char": 0.7053, + "loss_token": 1.6249, + "lr": "2.00e-07", + "norm": 5.6452, + "step": 411 + }, + { + "acc_char": 0.3026, + "acc_token": 0.6691, + "epoch": 0.01, + "loss_char": 0.7783, + "loss_token": 1.66, + "lr": "2.00e-07", + "norm": 5.9179, + "step": 412 + }, + { + "acc_char": 0.309, + "acc_token": 0.705, + "epoch": 0.01, + "loss_char": 0.6664, + "loss_token": 1.5434, + "lr": "2.00e-07", + "norm": 5.302, + "step": 413 + }, + { + "acc_char": 0.3089, + "acc_token": 0.6967, + "epoch": 0.01, + "loss_char": 0.6844, + "loss_token": 1.5804, + "lr": "2.00e-07", + "norm": 5.8454, + "step": 414 + }, + { + "acc_char": 0.2907, + "acc_token": 0.6686, + "epoch": 0.01, + "loss_char": 0.7367, + "loss_token": 1.6778, + "lr": "2.00e-07", + "norm": 5.7327, + "step": 415 + }, + { + "acc_char": 0.2921, + "acc_token": 0.6253, + "epoch": 0.01, + "loss_char": 0.8878, + "loss_token": 1.7222, + "lr": "2.00e-07", + "norm": 5.5795, + "step": 416 + }, + { + "acc_char": 0.2962, + "acc_token": 0.6945, + "epoch": 0.01, + "loss_char": 0.6684, + "loss_token": 1.5484, + "lr": "2.00e-07", + "norm": 5.6565, + "step": 417 + }, + { + "acc_char": 0.2975, + "acc_token": 0.6701, + "epoch": 0.01, + "loss_char": 0.7654, + "loss_token": 1.6914, + "lr": "2.00e-07", + "norm": 5.5792, + "step": 418 + }, + { + "acc_char": 0.3157, + "acc_token": 0.7062, + "epoch": 0.01, + "loss_char": 0.6615, + "loss_token": 1.6023, + "lr": "2.00e-07", + "norm": 5.8135, + "step": 419 + }, + { + "acc_char": 0.3077, + "acc_token": 0.693, + "epoch": 0.01, + "loss_char": 0.6872, + "loss_token": 1.5777, + "lr": "2.00e-07", + "norm": 5.8938, + "step": 420 + }, + { + "acc_char": 0.2868, + "acc_token": 0.6238, + "epoch": 0.01, + "loss_char": 0.8702, + "loss_token": 1.664, + "lr": "2.00e-07", + "norm": 5.3468, + "step": 421 + }, + { + "acc_char": 0.3136, + "acc_token": 0.7069, + "epoch": 0.01, + "loss_char": 0.6709, + "loss_token": 1.5435, + "lr": "2.00e-07", + "norm": 5.5504, + "step": 422 + }, + { + "acc_char": 0.299, + "acc_token": 0.6801, + "epoch": 0.01, + "loss_char": 0.7479, + "loss_token": 1.6409, + "lr": "2.00e-07", + "norm": 6.0676, + "step": 423 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6743, + "epoch": 0.01, + "loss_char": 0.7493, + "loss_token": 1.5541, + "lr": "2.00e-07", + "norm": 5.6725, + "step": 424 + }, + { + "acc_char": 0.3095, + "acc_token": 0.7043, + "epoch": 0.01, + "loss_char": 0.6468, + "loss_token": 1.5845, + "lr": "2.00e-07", + "norm": 5.718, + "step": 425 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6742, + "epoch": 0.01, + "loss_char": 0.7576, + "loss_token": 1.6773, + "lr": "2.00e-07", + "norm": 5.778, + "step": 426 + }, + { + "acc_char": 0.2981, + "acc_token": 0.674, + "epoch": 0.01, + "loss_char": 0.7737, + "loss_token": 1.7231, + "lr": "2.00e-07", + "norm": 5.9369, + "step": 427 + }, + { + "acc_char": 0.3146, + "acc_token": 0.7153, + "epoch": 0.01, + "loss_char": 0.6293, + "loss_token": 1.5328, + "lr": "2.00e-07", + "norm": 5.5686, + "step": 428 + }, + { + "acc_char": 0.3063, + "acc_token": 0.6914, + "epoch": 0.01, + "loss_char": 0.7156, + "loss_token": 1.5677, + "lr": "2.00e-07", + "norm": 5.7421, + "step": 429 + }, + { + "acc_char": 0.2992, + "acc_token": 0.6773, + "epoch": 0.01, + "loss_char": 0.7398, + "loss_token": 1.7212, + "lr": "2.00e-07", + "norm": 5.7725, + "step": 430 + }, + { + "acc_char": 0.2965, + "acc_token": 0.669, + "epoch": 0.01, + "loss_char": 0.7813, + "loss_token": 1.7132, + "lr": "2.00e-07", + "norm": 5.7314, + "step": 431 + }, + { + "acc_char": 0.3065, + "acc_token": 0.6996, + "epoch": 0.01, + "loss_char": 0.6745, + "loss_token": 1.5433, + "lr": "2.00e-07", + "norm": 5.6082, + "step": 432 + }, + { + "acc_char": 0.3203, + "acc_token": 0.7247, + "epoch": 0.01, + "loss_char": 0.6148, + "loss_token": 1.4884, + "lr": "2.00e-07", + "norm": 5.4403, + "step": 433 + }, + { + "acc_char": 0.3058, + "acc_token": 0.699, + "epoch": 0.01, + "loss_char": 0.6423, + "loss_token": 1.5294, + "lr": "2.00e-07", + "norm": 5.5608, + "step": 434 + }, + { + "acc_char": 0.2914, + "acc_token": 0.6616, + "epoch": 0.01, + "loss_char": 0.7899, + "loss_token": 1.7818, + "lr": "2.00e-07", + "norm": 6.3515, + "step": 435 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6781, + "epoch": 0.01, + "loss_char": 0.7233, + "loss_token": 1.643, + "lr": "2.00e-07", + "norm": 5.7809, + "step": 436 + }, + { + "acc_char": 0.3071, + "acc_token": 0.6873, + "epoch": 0.01, + "loss_char": 0.7219, + "loss_token": 1.6312, + "lr": "2.00e-07", + "norm": 5.8777, + "step": 437 + }, + { + "acc_char": 0.293, + "acc_token": 0.6592, + "epoch": 0.01, + "loss_char": 0.7718, + "loss_token": 1.7166, + "lr": "2.00e-07", + "norm": 6.0267, + "step": 438 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6753, + "epoch": 0.01, + "loss_char": 0.75, + "loss_token": 1.6695, + "lr": "2.00e-07", + "norm": 5.7281, + "step": 439 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6798, + "epoch": 0.01, + "loss_char": 0.7365, + "loss_token": 1.6402, + "lr": "2.00e-07", + "norm": 5.9977, + "step": 440 + }, + { + "acc_char": 0.296, + "acc_token": 0.6709, + "epoch": 0.01, + "loss_char": 0.7499, + "loss_token": 1.6319, + "lr": "2.00e-07", + "norm": 5.8092, + "step": 441 + }, + { + "acc_char": 0.303, + "acc_token": 0.6867, + "epoch": 0.01, + "loss_char": 0.696, + "loss_token": 1.535, + "lr": "2.00e-07", + "norm": 5.5169, + "step": 442 + }, + { + "acc_char": 0.3017, + "acc_token": 0.685, + "epoch": 0.01, + "loss_char": 0.7092, + "loss_token": 1.6507, + "lr": "2.00e-07", + "norm": 6.0394, + "step": 443 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6729, + "epoch": 0.01, + "loss_char": 0.7755, + "loss_token": 1.6699, + "lr": "2.00e-07", + "norm": 6.0785, + "step": 444 + }, + { + "acc_char": 0.2947, + "acc_token": 0.6726, + "epoch": 0.01, + "loss_char": 0.745, + "loss_token": 1.665, + "lr": "2.00e-07", + "norm": 5.4631, + "step": 445 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6881, + "epoch": 0.01, + "loss_char": 0.6957, + "loss_token": 1.5646, + "lr": "2.00e-07", + "norm": 6.0473, + "step": 446 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6825, + "epoch": 0.01, + "loss_char": 0.7218, + "loss_token": 1.6613, + "lr": "2.00e-07", + "norm": 5.7581, + "step": 447 + }, + { + "acc_char": 0.3038, + "acc_token": 0.6971, + "epoch": 0.01, + "loss_char": 0.6581, + "loss_token": 1.5529, + "lr": "2.00e-07", + "norm": 5.6224, + "step": 448 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6864, + "epoch": 0.01, + "loss_char": 0.7059, + "loss_token": 1.6413, + "lr": "2.00e-07", + "norm": 5.9119, + "step": 449 + }, + { + "acc_char": 0.3022, + "acc_token": 0.692, + "epoch": 0.01, + "loss_char": 0.6988, + "loss_token": 1.615, + "lr": "2.00e-07", + "norm": 5.6076, + "step": 450 + }, + { + "acc_char": 0.3073, + "acc_token": 0.6971, + "epoch": 0.01, + "loss_char": 0.6754, + "loss_token": 1.6428, + "lr": "2.00e-07", + "norm": 5.8904, + "step": 451 + }, + { + "acc_char": 0.3064, + "acc_token": 0.6801, + "epoch": 0.01, + "loss_char": 0.7328, + "loss_token": 1.631, + "lr": "2.00e-07", + "norm": 5.8551, + "step": 452 + }, + { + "acc_char": 0.3071, + "acc_token": 0.6928, + "epoch": 0.01, + "loss_char": 0.6906, + "loss_token": 1.5022, + "lr": "2.00e-07", + "norm": 5.4729, + "step": 453 + }, + { + "acc_char": 0.3208, + "acc_token": 0.7102, + "epoch": 0.01, + "loss_char": 0.6756, + "loss_token": 1.5461, + "lr": "2.00e-07", + "norm": 5.6754, + "step": 454 + }, + { + "acc_char": 0.3092, + "acc_token": 0.6937, + "epoch": 0.01, + "loss_char": 0.7156, + "loss_token": 1.6372, + "lr": "2.00e-07", + "norm": 6.0278, + "step": 455 + }, + { + "acc_char": 0.3199, + "acc_token": 0.7228, + "epoch": 0.01, + "loss_char": 0.6196, + "loss_token": 1.5193, + "lr": "2.00e-07", + "norm": 5.64, + "step": 456 + }, + { + "acc_char": 0.2966, + "acc_token": 0.6698, + "epoch": 0.01, + "loss_char": 0.7384, + "loss_token": 1.6731, + "lr": "2.00e-07", + "norm": 5.6788, + "step": 457 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6484, + "epoch": 0.01, + "loss_char": 0.8133, + "loss_token": 1.6824, + "lr": "2.00e-07", + "norm": 5.3832, + "step": 458 + }, + { + "acc_char": 0.2934, + "acc_token": 0.6721, + "epoch": 0.01, + "loss_char": 0.7316, + "loss_token": 1.729, + "lr": "2.00e-07", + "norm": 5.854, + "step": 459 + }, + { + "acc_char": 0.2848, + "acc_token": 0.619, + "epoch": 0.01, + "loss_char": 0.8858, + "loss_token": 1.7516, + "lr": "2.00e-07", + "norm": 5.5137, + "step": 460 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6674, + "epoch": 0.01, + "loss_char": 0.7782, + "loss_token": 1.6639, + "lr": "2.00e-07", + "norm": 5.8666, + "step": 461 + }, + { + "acc_char": 0.3221, + "acc_token": 0.7197, + "epoch": 0.01, + "loss_char": 0.6214, + "loss_token": 1.4808, + "lr": "2.00e-07", + "norm": 5.3481, + "step": 462 + }, + { + "acc_char": 0.3061, + "acc_token": 0.6853, + "epoch": 0.01, + "loss_char": 0.7414, + "loss_token": 1.57, + "lr": "2.00e-07", + "norm": 5.7585, + "step": 463 + }, + { + "acc_char": 0.314, + "acc_token": 0.7022, + "epoch": 0.01, + "loss_char": 0.6737, + "loss_token": 1.5346, + "lr": "2.00e-07", + "norm": 5.5364, + "step": 464 + }, + { + "acc_char": 0.3145, + "acc_token": 0.7132, + "epoch": 0.01, + "loss_char": 0.6335, + "loss_token": 1.471, + "lr": "2.00e-07", + "norm": 5.0732, + "step": 465 + }, + { + "acc_char": 0.31, + "acc_token": 0.6899, + "epoch": 0.01, + "loss_char": 0.7093, + "loss_token": 1.6284, + "lr": "2.00e-07", + "norm": 5.6945, + "step": 466 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6829, + "epoch": 0.01, + "loss_char": 0.7209, + "loss_token": 1.6079, + "lr": "2.00e-07", + "norm": 5.8328, + "step": 467 + }, + { + "acc_char": 0.2971, + "acc_token": 0.6809, + "epoch": 0.01, + "loss_char": 0.7248, + "loss_token": 1.6548, + "lr": "2.00e-07", + "norm": 5.8493, + "step": 468 + }, + { + "acc_char": 0.309, + "acc_token": 0.7055, + "epoch": 0.01, + "loss_char": 0.6652, + "loss_token": 1.552, + "lr": "2.00e-07", + "norm": 5.7961, + "step": 469 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6878, + "epoch": 0.01, + "loss_char": 0.6956, + "loss_token": 1.6944, + "lr": "2.00e-07", + "norm": 5.8907, + "step": 470 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6759, + "epoch": 0.01, + "loss_char": 0.7354, + "loss_token": 1.5767, + "lr": "2.00e-07", + "norm": 5.6017, + "step": 471 + }, + { + "acc_char": 0.2985, + "acc_token": 0.6779, + "epoch": 0.01, + "loss_char": 0.7313, + "loss_token": 1.6971, + "lr": "2.00e-07", + "norm": 5.7723, + "step": 472 + }, + { + "acc_char": 0.2867, + "acc_token": 0.6601, + "epoch": 0.01, + "loss_char": 0.7551, + "loss_token": 1.709, + "lr": "2.00e-07", + "norm": 5.6993, + "step": 473 + }, + { + "acc_char": 0.279, + "acc_token": 0.6141, + "epoch": 0.01, + "loss_char": 0.9209, + "loss_token": 1.8436, + "lr": "2.00e-07", + "norm": 5.3151, + "step": 474 + }, + { + "acc_char": 0.3006, + "acc_token": 0.6697, + "epoch": 0.01, + "loss_char": 0.7635, + "loss_token": 1.6591, + "lr": "2.00e-07", + "norm": 5.8138, + "step": 475 + }, + { + "acc_char": 0.3053, + "acc_token": 0.6845, + "epoch": 0.01, + "loss_char": 0.7014, + "loss_token": 1.5667, + "lr": "2.00e-07", + "norm": 5.6497, + "step": 476 + }, + { + "acc_char": 0.3063, + "acc_token": 0.6823, + "epoch": 0.01, + "loss_char": 0.7325, + "loss_token": 1.6252, + "lr": "2.00e-07", + "norm": 5.7174, + "step": 477 + }, + { + "acc_char": 0.3232, + "acc_token": 0.7062, + "epoch": 0.01, + "loss_char": 0.7192, + "loss_token": 1.6411, + "lr": "2.00e-07", + "norm": 5.4237, + "step": 478 + }, + { + "acc_char": 0.3098, + "acc_token": 0.6917, + "epoch": 0.01, + "loss_char": 0.708, + "loss_token": 1.6068, + "lr": "2.00e-07", + "norm": 5.5393, + "step": 479 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6798, + "epoch": 0.01, + "loss_char": 0.6922, + "loss_token": 1.4888, + "lr": "2.00e-07", + "norm": 5.7247, + "step": 480 + }, + { + "acc_char": 0.3042, + "acc_token": 0.6869, + "epoch": 0.01, + "loss_char": 0.7076, + "loss_token": 1.6158, + "lr": "2.00e-07", + "norm": 5.9854, + "step": 481 + }, + { + "acc_char": 0.3018, + "acc_token": 0.6875, + "epoch": 0.01, + "loss_char": 0.6984, + "loss_token": 1.6211, + "lr": "2.00e-07", + "norm": 5.8652, + "step": 482 + }, + { + "acc_char": 0.3003, + "acc_token": 0.68, + "epoch": 0.01, + "loss_char": 0.7306, + "loss_token": 1.6727, + "lr": "2.00e-07", + "norm": 5.6525, + "step": 483 + }, + { + "acc_char": 0.3038, + "acc_token": 0.6767, + "epoch": 0.01, + "loss_char": 0.7567, + "loss_token": 1.6292, + "lr": "2.00e-07", + "norm": 5.5359, + "step": 484 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6953, + "epoch": 0.01, + "loss_char": 0.6971, + "loss_token": 1.5871, + "lr": "2.00e-07", + "norm": 5.9003, + "step": 485 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6804, + "epoch": 0.01, + "loss_char": 0.7237, + "loss_token": 1.6143, + "lr": "2.00e-07", + "norm": 5.6656, + "step": 486 + }, + { + "acc_char": 0.2974, + "acc_token": 0.6946, + "epoch": 0.01, + "loss_char": 0.6534, + "loss_token": 1.6319, + "lr": "2.00e-07", + "norm": 5.7585, + "step": 487 + }, + { + "acc_char": 0.2946, + "acc_token": 0.6734, + "epoch": 0.01, + "loss_char": 0.7431, + "loss_token": 1.6914, + "lr": "2.00e-07", + "norm": 5.6418, + "step": 488 + }, + { + "acc_char": 0.3119, + "acc_token": 0.7011, + "epoch": 0.01, + "loss_char": 0.6807, + "loss_token": 1.6073, + "lr": "2.00e-07", + "norm": 5.6436, + "step": 489 + }, + { + "acc_char": 0.2839, + "acc_token": 0.6155, + "epoch": 0.01, + "loss_char": 0.9091, + "loss_token": 1.8123, + "lr": "2.00e-07", + "norm": 5.6981, + "step": 490 + }, + { + "acc_char": 0.2984, + "acc_token": 0.6703, + "epoch": 0.01, + "loss_char": 0.7578, + "loss_token": 1.5599, + "lr": "2.00e-07", + "norm": 5.6947, + "step": 491 + }, + { + "acc_char": 0.3065, + "acc_token": 0.6484, + "epoch": 0.01, + "loss_char": 0.8497, + "loss_token": 1.6805, + "lr": "2.00e-07", + "norm": 5.4046, + "step": 492 + }, + { + "acc_char": 0.2993, + "acc_token": 0.679, + "epoch": 0.01, + "loss_char": 0.7339, + "loss_token": 1.6077, + "lr": "2.00e-07", + "norm": 5.4286, + "step": 493 + }, + { + "acc_char": 0.2944, + "acc_token": 0.6653, + "epoch": 0.01, + "loss_char": 0.7696, + "loss_token": 1.7296, + "lr": "2.00e-07", + "norm": 5.8803, + "step": 494 + }, + { + "acc_char": 0.3013, + "acc_token": 0.6859, + "epoch": 0.01, + "loss_char": 0.7333, + "loss_token": 1.6756, + "lr": "2.00e-07", + "norm": 5.6646, + "step": 495 + }, + { + "acc_char": 0.2916, + "acc_token": 0.6663, + "epoch": 0.01, + "loss_char": 0.7465, + "loss_token": 1.7513, + "lr": "2.00e-07", + "norm": 5.9079, + "step": 496 + }, + { + "acc_char": 0.3041, + "acc_token": 0.6897, + "epoch": 0.01, + "loss_char": 0.7028, + "loss_token": 1.5441, + "lr": "2.00e-07", + "norm": 5.4711, + "step": 497 + }, + { + "acc_char": 0.298, + "acc_token": 0.6945, + "epoch": 0.01, + "loss_char": 0.6889, + "loss_token": 1.5937, + "lr": "2.00e-07", + "norm": 5.9388, + "step": 498 + }, + { + "acc_char": 0.3092, + "acc_token": 0.6973, + "epoch": 0.01, + "loss_char": 0.6963, + "loss_token": 1.5762, + "lr": "2.00e-07", + "norm": 5.4772, + "step": 499 + }, + { + "acc_char": 0.297, + "acc_token": 0.6663, + "epoch": 0.01, + "loss_char": 0.7789, + "loss_token": 1.6915, + "lr": "2.00e-07", + "norm": 5.8044, + "step": 500 + }, + { + "acc_char": 0.3078, + "acc_token": 0.6932, + "epoch": 0.01, + "loss_char": 0.7231, + "loss_token": 1.5793, + "lr": "2.00e-07", + "norm": 5.6492, + "step": 501 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6777, + "epoch": 0.01, + "loss_char": 0.7369, + "loss_token": 1.6032, + "lr": "2.00e-07", + "norm": 5.6355, + "step": 502 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6943, + "epoch": 0.01, + "loss_char": 0.6721, + "loss_token": 1.6407, + "lr": "2.00e-07", + "norm": 5.3251, + "step": 503 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6851, + "epoch": 0.01, + "loss_char": 0.7497, + "loss_token": 1.67, + "lr": "2.00e-07", + "norm": 5.6149, + "step": 504 + }, + { + "acc_char": 0.3165, + "acc_token": 0.7085, + "epoch": 0.01, + "loss_char": 0.6598, + "loss_token": 1.5165, + "lr": "2.00e-07", + "norm": 5.5233, + "step": 505 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6898, + "epoch": 0.01, + "loss_char": 0.6862, + "loss_token": 1.6602, + "lr": "2.00e-07", + "norm": 5.7605, + "step": 506 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6698, + "epoch": 0.01, + "loss_char": 0.7666, + "loss_token": 1.7187, + "lr": "2.00e-07", + "norm": 6.0065, + "step": 507 + }, + { + "acc_char": 0.2914, + "acc_token": 0.6369, + "epoch": 0.01, + "loss_char": 0.8238, + "loss_token": 1.7726, + "lr": "2.00e-07", + "norm": 5.4933, + "step": 508 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6841, + "epoch": 0.01, + "loss_char": 0.7153, + "loss_token": 1.6288, + "lr": "2.00e-07", + "norm": 5.4807, + "step": 509 + }, + { + "acc_char": 0.2938, + "acc_token": 0.6602, + "epoch": 0.01, + "loss_char": 0.7827, + "loss_token": 1.7307, + "lr": "2.00e-07", + "norm": 5.9197, + "step": 510 + }, + { + "acc_char": 0.297, + "acc_token": 0.6911, + "epoch": 0.01, + "loss_char": 0.6722, + "loss_token": 1.5813, + "lr": "2.00e-07", + "norm": 5.7085, + "step": 511 + }, + { + "acc_char": 0.3012, + "acc_token": 0.6763, + "epoch": 0.01, + "loss_char": 0.7463, + "loss_token": 1.6278, + "lr": "2.00e-07", + "norm": 5.6667, + "step": 512 + }, + { + "acc_char": 0.3061, + "acc_token": 0.682, + "epoch": 0.01, + "loss_char": 0.7469, + "loss_token": 1.6617, + "lr": "2.00e-07", + "norm": 5.6931, + "step": 513 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6861, + "epoch": 0.01, + "loss_char": 0.7019, + "loss_token": 1.4865, + "lr": "2.00e-07", + "norm": 5.4473, + "step": 514 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6729, + "epoch": 0.01, + "loss_char": 0.7615, + "loss_token": 1.6468, + "lr": "2.00e-07", + "norm": 5.6802, + "step": 515 + }, + { + "acc_char": 0.2938, + "acc_token": 0.6499, + "epoch": 0.01, + "loss_char": 0.8332, + "loss_token": 1.8228, + "lr": "2.00e-07", + "norm": 6.4352, + "step": 516 + }, + { + "acc_char": 0.3015, + "acc_token": 0.675, + "epoch": 0.01, + "loss_char": 0.7496, + "loss_token": 1.6712, + "lr": "2.00e-07", + "norm": 6.3257, + "step": 517 + }, + { + "acc_char": 0.2868, + "acc_token": 0.6577, + "epoch": 0.01, + "loss_char": 0.8048, + "loss_token": 1.7543, + "lr": "2.00e-07", + "norm": 5.8648, + "step": 518 + }, + { + "acc_char": 0.3102, + "acc_token": 0.6952, + "epoch": 0.01, + "loss_char": 0.7037, + "loss_token": 1.6965, + "lr": "2.00e-07", + "norm": 6.092, + "step": 519 + }, + { + "acc_char": 0.3043, + "acc_token": 0.6834, + "epoch": 0.01, + "loss_char": 0.7227, + "loss_token": 1.6312, + "lr": "2.00e-07", + "norm": 5.761, + "step": 520 + }, + { + "acc_char": 0.2938, + "acc_token": 0.6543, + "epoch": 0.01, + "loss_char": 0.8214, + "loss_token": 1.7083, + "lr": "2.00e-07", + "norm": 5.9636, + "step": 521 + }, + { + "acc_char": 0.3135, + "acc_token": 0.7087, + "epoch": 0.01, + "loss_char": 0.6632, + "loss_token": 1.6053, + "lr": "2.00e-07", + "norm": 5.7167, + "step": 522 + }, + { + "acc_char": 0.309, + "acc_token": 0.6883, + "epoch": 0.01, + "loss_char": 0.7156, + "loss_token": 1.6225, + "lr": "2.00e-07", + "norm": 5.7743, + "step": 523 + }, + { + "acc_char": 0.2965, + "acc_token": 0.6721, + "epoch": 0.01, + "loss_char": 0.7391, + "loss_token": 1.7042, + "lr": "2.00e-07", + "norm": 5.8933, + "step": 524 + }, + { + "acc_char": 0.3038, + "acc_token": 0.6833, + "epoch": 0.01, + "loss_char": 0.7208, + "loss_token": 1.6832, + "lr": "2.00e-07", + "norm": 5.7907, + "step": 525 + }, + { + "acc_char": 0.3054, + "acc_token": 0.6899, + "epoch": 0.01, + "loss_char": 0.7107, + "loss_token": 1.5754, + "lr": "2.00e-07", + "norm": 5.5543, + "step": 526 + }, + { + "acc_char": 0.3021, + "acc_token": 0.6727, + "epoch": 0.01, + "loss_char": 0.7716, + "loss_token": 1.6425, + "lr": "2.00e-07", + "norm": 6.5612, + "step": 527 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6779, + "epoch": 0.01, + "loss_char": 0.7249, + "loss_token": 1.5985, + "lr": "2.00e-07", + "norm": 5.6789, + "step": 528 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6786, + "epoch": 0.01, + "loss_char": 0.7245, + "loss_token": 1.6649, + "lr": "2.00e-07", + "norm": 5.9216, + "step": 529 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6999, + "epoch": 0.01, + "loss_char": 0.6842, + "loss_token": 1.5994, + "lr": "2.00e-07", + "norm": 5.4386, + "step": 530 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6736, + "epoch": 0.01, + "loss_char": 0.7615, + "loss_token": 1.5393, + "lr": "2.00e-07", + "norm": 5.4897, + "step": 531 + }, + { + "acc_char": 0.2958, + "acc_token": 0.6759, + "epoch": 0.01, + "loss_char": 0.7476, + "loss_token": 1.5885, + "lr": "2.00e-07", + "norm": 5.529, + "step": 532 + }, + { + "acc_char": 0.297, + "acc_token": 0.6895, + "epoch": 0.01, + "loss_char": 0.6842, + "loss_token": 1.5783, + "lr": "2.00e-07", + "norm": 5.5189, + "step": 533 + }, + { + "acc_char": 0.2983, + "acc_token": 0.6761, + "epoch": 0.01, + "loss_char": 0.7425, + "loss_token": 1.5862, + "lr": "2.00e-07", + "norm": 5.5887, + "step": 534 + }, + { + "acc_char": 0.3002, + "acc_token": 0.692, + "epoch": 0.01, + "loss_char": 0.6575, + "loss_token": 1.5723, + "lr": "2.00e-07", + "norm": 5.3558, + "step": 535 + }, + { + "acc_char": 0.298, + "acc_token": 0.676, + "epoch": 0.01, + "loss_char": 0.7515, + "loss_token": 1.6691, + "lr": "2.00e-07", + "norm": 6.0115, + "step": 536 + }, + { + "acc_char": 0.2924, + "acc_token": 0.674, + "epoch": 0.01, + "loss_char": 0.7274, + "loss_token": 1.611, + "lr": "2.00e-07", + "norm": 5.4644, + "step": 537 + }, + { + "acc_char": 0.3046, + "acc_token": 0.6955, + "epoch": 0.01, + "loss_char": 0.6839, + "loss_token": 1.6216, + "lr": "2.00e-07", + "norm": 5.4058, + "step": 538 + }, + { + "acc_char": 0.3, + "acc_token": 0.6712, + "epoch": 0.01, + "loss_char": 0.7657, + "loss_token": 1.745, + "lr": "2.00e-07", + "norm": 5.8636, + "step": 539 + }, + { + "acc_char": 0.3209, + "acc_token": 0.7264, + "epoch": 0.01, + "loss_char": 0.6182, + "loss_token": 1.5795, + "lr": "2.00e-07", + "norm": 5.4199, + "step": 540 + }, + { + "acc_char": 0.291, + "acc_token": 0.6599, + "epoch": 0.01, + "loss_char": 0.7919, + "loss_token": 1.7544, + "lr": "2.00e-07", + "norm": 5.7709, + "step": 541 + }, + { + "acc_char": 0.3051, + "acc_token": 0.6966, + "epoch": 0.01, + "loss_char": 0.6722, + "loss_token": 1.5357, + "lr": "2.00e-07", + "norm": 5.7007, + "step": 542 + }, + { + "acc_char": 0.3207, + "acc_token": 0.6919, + "epoch": 0.01, + "loss_char": 0.6804, + "loss_token": 1.5393, + "lr": "2.00e-07", + "norm": 6.2127, + "step": 543 + }, + { + "acc_char": 0.316, + "acc_token": 0.705, + "epoch": 0.01, + "loss_char": 0.6765, + "loss_token": 1.5082, + "lr": "2.00e-07", + "norm": 5.3352, + "step": 544 + }, + { + "acc_char": 0.3092, + "acc_token": 0.6964, + "epoch": 0.01, + "loss_char": 0.6843, + "loss_token": 1.5882, + "lr": "2.00e-07", + "norm": 5.6973, + "step": 545 + }, + { + "acc_char": 0.2907, + "acc_token": 0.6619, + "epoch": 0.01, + "loss_char": 0.7776, + "loss_token": 1.7012, + "lr": "2.00e-07", + "norm": 5.6625, + "step": 546 + }, + { + "acc_char": 0.3011, + "acc_token": 0.6715, + "epoch": 0.01, + "loss_char": 0.7724, + "loss_token": 1.7237, + "lr": "2.00e-07", + "norm": 5.8425, + "step": 547 + }, + { + "acc_char": 0.292, + "acc_token": 0.6715, + "epoch": 0.01, + "loss_char": 0.7631, + "loss_token": 1.6806, + "lr": "2.00e-07", + "norm": 5.5621, + "step": 548 + }, + { + "acc_char": 0.3137, + "acc_token": 0.7052, + "epoch": 0.01, + "loss_char": 0.6651, + "loss_token": 1.5382, + "lr": "2.00e-07", + "norm": 5.3519, + "step": 549 + }, + { + "acc_char": 0.3066, + "acc_token": 0.6953, + "epoch": 0.01, + "loss_char": 0.684, + "loss_token": 1.6192, + "lr": "2.00e-07", + "norm": 5.5761, + "step": 550 + }, + { + "acc_char": 0.3174, + "acc_token": 0.7004, + "epoch": 0.01, + "loss_char": 0.6984, + "loss_token": 1.5912, + "lr": "2.00e-07", + "norm": 5.7781, + "step": 551 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6731, + "epoch": 0.01, + "loss_char": 0.7512, + "loss_token": 1.6747, + "lr": "2.00e-07", + "norm": 5.8476, + "step": 552 + }, + { + "acc_char": 0.2734, + "acc_token": 0.6097, + "epoch": 0.01, + "loss_char": 0.8934, + "loss_token": 1.8362, + "lr": "2.00e-07", + "norm": 5.7851, + "step": 553 + }, + { + "acc_char": 0.3226, + "acc_token": 0.7002, + "epoch": 0.01, + "loss_char": 0.6766, + "loss_token": 1.5276, + "lr": "2.00e-07", + "norm": 5.8662, + "step": 554 + }, + { + "acc_char": 0.3191, + "acc_token": 0.7262, + "epoch": 0.01, + "loss_char": 0.6071, + "loss_token": 1.4902, + "lr": "2.00e-07", + "norm": 5.7735, + "step": 555 + }, + { + "acc_char": 0.2984, + "acc_token": 0.6792, + "epoch": 0.01, + "loss_char": 0.7263, + "loss_token": 1.6368, + "lr": "2.00e-07", + "norm": 5.5514, + "step": 556 + }, + { + "acc_char": 0.3112, + "acc_token": 0.6938, + "epoch": 0.01, + "loss_char": 0.7188, + "loss_token": 1.5125, + "lr": "2.00e-07", + "norm": 5.6774, + "step": 557 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6932, + "epoch": 0.01, + "loss_char": 0.683, + "loss_token": 1.5882, + "lr": "2.00e-07", + "norm": 5.5219, + "step": 558 + }, + { + "acc_char": 0.3024, + "acc_token": 0.6932, + "epoch": 0.01, + "loss_char": 0.7094, + "loss_token": 1.6385, + "lr": "2.00e-07", + "norm": 5.9762, + "step": 559 + }, + { + "acc_char": 0.2901, + "acc_token": 0.664, + "epoch": 0.01, + "loss_char": 0.7806, + "loss_token": 1.7459, + "lr": "2.00e-07", + "norm": 6.0142, + "step": 560 + }, + { + "acc_char": 0.3097, + "acc_token": 0.6902, + "epoch": 0.01, + "loss_char": 0.7173, + "loss_token": 1.6327, + "lr": "2.00e-07", + "norm": 6.0807, + "step": 561 + }, + { + "acc_char": 0.3026, + "acc_token": 0.6837, + "epoch": 0.01, + "loss_char": 0.7394, + "loss_token": 1.6375, + "lr": "2.00e-07", + "norm": 5.6845, + "step": 562 + }, + { + "acc_char": 0.296, + "acc_token": 0.676, + "epoch": 0.01, + "loss_char": 0.7309, + "loss_token": 1.6614, + "lr": "2.00e-07", + "norm": 5.6079, + "step": 563 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6811, + "epoch": 0.01, + "loss_char": 0.7016, + "loss_token": 1.5666, + "lr": "2.00e-07", + "norm": 5.4709, + "step": 564 + }, + { + "acc_char": 0.3028, + "acc_token": 0.6891, + "epoch": 0.01, + "loss_char": 0.7079, + "loss_token": 1.6339, + "lr": "2.00e-07", + "norm": 5.6625, + "step": 565 + }, + { + "acc_char": 0.2735, + "acc_token": 0.6008, + "epoch": 0.01, + "loss_char": 0.9197, + "loss_token": 1.8075, + "lr": "2.00e-07", + "norm": 5.5777, + "step": 566 + }, + { + "acc_char": 0.3075, + "acc_token": 0.6919, + "epoch": 0.01, + "loss_char": 0.7049, + "loss_token": 1.5574, + "lr": "2.00e-07", + "norm": 5.8756, + "step": 567 + }, + { + "acc_char": 0.3068, + "acc_token": 0.6863, + "epoch": 0.01, + "loss_char": 0.7152, + "loss_token": 1.7041, + "lr": "2.00e-07", + "norm": 5.9522, + "step": 568 + }, + { + "acc_char": 0.2755, + "acc_token": 0.6058, + "epoch": 0.01, + "loss_char": 0.9253, + "loss_token": 1.8017, + "lr": "2.00e-07", + "norm": 5.3494, + "step": 569 + }, + { + "acc_char": 0.295, + "acc_token": 0.6807, + "epoch": 0.01, + "loss_char": 0.709, + "loss_token": 1.6612, + "lr": "2.00e-07", + "norm": 5.6727, + "step": 570 + }, + { + "acc_char": 0.3051, + "acc_token": 0.6734, + "epoch": 0.01, + "loss_char": 0.7934, + "loss_token": 1.6827, + "lr": "2.00e-07", + "norm": 5.8125, + "step": 571 + }, + { + "acc_char": 0.2914, + "acc_token": 0.6652, + "epoch": 0.01, + "loss_char": 0.7623, + "loss_token": 1.6601, + "lr": "2.00e-07", + "norm": 5.7307, + "step": 572 + }, + { + "acc_char": 0.2923, + "acc_token": 0.674, + "epoch": 0.01, + "loss_char": 0.7213, + "loss_token": 1.7202, + "lr": "2.00e-07", + "norm": 6.0228, + "step": 573 + }, + { + "acc_char": 0.3059, + "acc_token": 0.6905, + "epoch": 0.01, + "loss_char": 0.7063, + "loss_token": 1.6059, + "lr": "2.00e-07", + "norm": 5.7055, + "step": 574 + }, + { + "acc_char": 0.2859, + "acc_token": 0.6212, + "epoch": 0.01, + "loss_char": 0.8812, + "loss_token": 1.7594, + "lr": "2.00e-07", + "norm": 5.642, + "step": 575 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6935, + "epoch": 0.01, + "loss_char": 0.6684, + "loss_token": 1.5689, + "lr": "2.00e-07", + "norm": 5.6313, + "step": 576 + }, + { + "acc_char": 0.3063, + "acc_token": 0.7125, + "epoch": 0.01, + "loss_char": 0.6309, + "loss_token": 1.593, + "lr": "2.00e-07", + "norm": 5.3535, + "step": 577 + }, + { + "acc_char": 0.299, + "acc_token": 0.6642, + "epoch": 0.01, + "loss_char": 0.7921, + "loss_token": 1.662, + "lr": "2.00e-07", + "norm": 5.6795, + "step": 578 + }, + { + "acc_char": 0.3058, + "acc_token": 0.6969, + "epoch": 0.01, + "loss_char": 0.6817, + "loss_token": 1.6438, + "lr": "2.00e-07", + "norm": 5.6116, + "step": 579 + }, + { + "acc_char": 0.292, + "acc_token": 0.6723, + "epoch": 0.01, + "loss_char": 0.7104, + "loss_token": 1.6338, + "lr": "2.00e-07", + "norm": 5.7615, + "step": 580 + }, + { + "acc_char": 0.297, + "acc_token": 0.6732, + "epoch": 0.01, + "loss_char": 0.7542, + "loss_token": 1.6422, + "lr": "2.00e-07", + "norm": 5.9272, + "step": 581 + }, + { + "acc_char": 0.2948, + "acc_token": 0.6641, + "epoch": 0.01, + "loss_char": 0.781, + "loss_token": 1.6998, + "lr": "2.00e-07", + "norm": 5.6837, + "step": 582 + }, + { + "acc_char": 0.3182, + "acc_token": 0.7073, + "epoch": 0.01, + "loss_char": 0.6854, + "loss_token": 1.5821, + "lr": "2.00e-07", + "norm": 5.727, + "step": 583 + }, + { + "acc_char": 0.2881, + "acc_token": 0.6246, + "epoch": 0.01, + "loss_char": 0.8612, + "loss_token": 1.7089, + "lr": "2.00e-07", + "norm": 5.4477, + "step": 584 + }, + { + "acc_char": 0.3289, + "acc_token": 0.6926, + "epoch": 0.01, + "loss_char": 0.7151, + "loss_token": 1.4939, + "lr": "2.00e-07", + "norm": 5.2305, + "step": 585 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6918, + "epoch": 0.01, + "loss_char": 0.6788, + "loss_token": 1.5943, + "lr": "2.00e-07", + "norm": 5.6058, + "step": 586 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6822, + "epoch": 0.01, + "loss_char": 0.7151, + "loss_token": 1.67, + "lr": "2.00e-07", + "norm": 5.5937, + "step": 587 + }, + { + "acc_char": 0.3001, + "acc_token": 0.6367, + "epoch": 0.01, + "loss_char": 0.8631, + "loss_token": 1.7495, + "lr": "2.00e-07", + "norm": 8.798, + "step": 588 + }, + { + "acc_char": 0.2933, + "acc_token": 0.6781, + "epoch": 0.01, + "loss_char": 0.7214, + "loss_token": 1.6052, + "lr": "2.00e-07", + "norm": 5.4492, + "step": 589 + }, + { + "acc_char": 0.3158, + "acc_token": 0.6998, + "epoch": 0.01, + "loss_char": 0.7083, + "loss_token": 1.5872, + "lr": "2.00e-07", + "norm": 5.6008, + "step": 590 + }, + { + "acc_char": 0.3111, + "acc_token": 0.6977, + "epoch": 0.01, + "loss_char": 0.6923, + "loss_token": 1.6226, + "lr": "2.00e-07", + "norm": 5.7109, + "step": 591 + }, + { + "acc_char": 0.2877, + "acc_token": 0.6725, + "epoch": 0.01, + "loss_char": 0.7402, + "loss_token": 1.6398, + "lr": "2.00e-07", + "norm": 5.629, + "step": 592 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6806, + "epoch": 0.01, + "loss_char": 0.7446, + "loss_token": 1.6434, + "lr": "2.00e-07", + "norm": 6.1502, + "step": 593 + }, + { + "acc_char": 0.31, + "acc_token": 0.6981, + "epoch": 0.01, + "loss_char": 0.6608, + "loss_token": 1.5804, + "lr": "2.00e-07", + "norm": 5.528, + "step": 594 + }, + { + "acc_char": 0.3065, + "acc_token": 0.6986, + "epoch": 0.01, + "loss_char": 0.6774, + "loss_token": 1.5434, + "lr": "2.00e-07", + "norm": 5.4188, + "step": 595 + }, + { + "acc_char": 0.2961, + "acc_token": 0.6391, + "epoch": 0.01, + "loss_char": 0.851, + "loss_token": 1.6733, + "lr": "2.00e-07", + "norm": 5.2518, + "step": 596 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6732, + "epoch": 0.01, + "loss_char": 0.7501, + "loss_token": 1.6025, + "lr": "2.00e-07", + "norm": 5.7633, + "step": 597 + }, + { + "acc_char": 0.3057, + "acc_token": 0.6638, + "epoch": 0.01, + "loss_char": 0.7828, + "loss_token": 1.6712, + "lr": "2.00e-07", + "norm": 5.4425, + "step": 598 + }, + { + "acc_char": 0.3001, + "acc_token": 0.6873, + "epoch": 0.01, + "loss_char": 0.6919, + "loss_token": 1.6011, + "lr": "2.00e-07", + "norm": 5.5889, + "step": 599 + }, + { + "acc_char": 0.3081, + "acc_token": 0.6991, + "epoch": 0.01, + "loss_char": 0.6603, + "loss_token": 1.5127, + "lr": "2.00e-07", + "norm": 5.6657, + "step": 600 + }, + { + "acc_char": 0.3022, + "acc_token": 0.6785, + "epoch": 0.01, + "loss_char": 0.7635, + "loss_token": 1.6259, + "lr": "2.00e-07", + "norm": 5.6799, + "step": 601 + }, + { + "acc_char": 0.3026, + "acc_token": 0.6963, + "epoch": 0.01, + "loss_char": 0.6647, + "loss_token": 1.5104, + "lr": "2.00e-07", + "norm": 5.58, + "step": 602 + }, + { + "acc_char": 0.3058, + "acc_token": 0.6903, + "epoch": 0.01, + "loss_char": 0.7082, + "loss_token": 1.513, + "lr": "2.00e-07", + "norm": 5.5287, + "step": 603 + }, + { + "acc_char": 0.2897, + "acc_token": 0.6295, + "epoch": 0.01, + "loss_char": 0.8635, + "loss_token": 1.744, + "lr": "2.00e-07", + "norm": 5.838, + "step": 604 + }, + { + "acc_char": 0.3073, + "acc_token": 0.7038, + "epoch": 0.01, + "loss_char": 0.6604, + "loss_token": 1.5419, + "lr": "2.00e-07", + "norm": 5.6741, + "step": 605 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6721, + "epoch": 0.01, + "loss_char": 0.7576, + "loss_token": 1.664, + "lr": "2.00e-07", + "norm": 6.1815, + "step": 606 + }, + { + "acc_char": 0.2971, + "acc_token": 0.6848, + "epoch": 0.01, + "loss_char": 0.7164, + "loss_token": 1.7081, + "lr": "2.00e-07", + "norm": 5.8114, + "step": 607 + }, + { + "acc_char": 0.2944, + "acc_token": 0.6658, + "epoch": 0.01, + "loss_char": 0.7991, + "loss_token": 1.7166, + "lr": "2.00e-07", + "norm": 5.7798, + "step": 608 + }, + { + "acc_char": 0.2876, + "acc_token": 0.6315, + "epoch": 0.01, + "loss_char": 0.8615, + "loss_token": 1.756, + "lr": "2.00e-07", + "norm": 5.2967, + "step": 609 + }, + { + "acc_char": 0.3101, + "acc_token": 0.7015, + "epoch": 0.01, + "loss_char": 0.6474, + "loss_token": 1.5666, + "lr": "2.00e-07", + "norm": 5.5296, + "step": 610 + }, + { + "acc_char": 0.3071, + "acc_token": 0.7117, + "epoch": 0.01, + "loss_char": 0.609, + "loss_token": 1.3597, + "lr": "2.00e-07", + "norm": 5.9792, + "step": 611 + }, + { + "acc_char": 0.3121, + "acc_token": 0.7013, + "epoch": 0.01, + "loss_char": 0.6796, + "loss_token": 1.5647, + "lr": "2.00e-07", + "norm": 5.7678, + "step": 612 + }, + { + "acc_char": 0.3108, + "acc_token": 0.7027, + "epoch": 0.01, + "loss_char": 0.6619, + "loss_token": 1.5006, + "lr": "2.00e-07", + "norm": 5.5738, + "step": 613 + }, + { + "acc_char": 0.3075, + "acc_token": 0.6938, + "epoch": 0.01, + "loss_char": 0.6819, + "loss_token": 1.5156, + "lr": "2.00e-07", + "norm": 5.315, + "step": 614 + }, + { + "acc_char": 0.3043, + "acc_token": 0.6782, + "epoch": 0.01, + "loss_char": 0.744, + "loss_token": 1.6767, + "lr": "2.00e-07", + "norm": 6.1106, + "step": 615 + }, + { + "acc_char": 0.2944, + "acc_token": 0.6681, + "epoch": 0.01, + "loss_char": 0.8069, + "loss_token": 1.7487, + "lr": "2.00e-07", + "norm": 6.015, + "step": 616 + }, + { + "acc_char": 0.3186, + "acc_token": 0.7171, + "epoch": 0.01, + "loss_char": 0.642, + "loss_token": 1.5414, + "lr": "2.00e-07", + "norm": 5.3756, + "step": 617 + }, + { + "acc_char": 0.3015, + "acc_token": 0.679, + "epoch": 0.01, + "loss_char": 0.7143, + "loss_token": 1.6086, + "lr": "2.00e-07", + "norm": 5.9773, + "step": 618 + }, + { + "acc_char": 0.3277, + "acc_token": 0.7303, + "epoch": 0.01, + "loss_char": 0.6219, + "loss_token": 1.5241, + "lr": "2.00e-07", + "norm": 5.9241, + "step": 619 + }, + { + "acc_char": 0.3184, + "acc_token": 0.7167, + "epoch": 0.01, + "loss_char": 0.6294, + "loss_token": 1.525, + "lr": "2.00e-07", + "norm": 5.6258, + "step": 620 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6806, + "epoch": 0.01, + "loss_char": 0.7262, + "loss_token": 1.6419, + "lr": "2.00e-07", + "norm": 5.6224, + "step": 621 + }, + { + "acc_char": 0.2978, + "acc_token": 0.6767, + "epoch": 0.01, + "loss_char": 0.7415, + "loss_token": 1.6946, + "lr": "2.00e-07", + "norm": 5.9768, + "step": 622 + }, + { + "acc_char": 0.2806, + "acc_token": 0.6274, + "epoch": 0.01, + "loss_char": 0.8257, + "loss_token": 1.6916, + "lr": "2.00e-07", + "norm": 5.191, + "step": 623 + }, + { + "acc_char": 0.3144, + "acc_token": 0.7105, + "epoch": 0.01, + "loss_char": 0.6586, + "loss_token": 1.5043, + "lr": "2.00e-07", + "norm": 5.4252, + "step": 624 + }, + { + "acc_char": 0.2839, + "acc_token": 0.6208, + "epoch": 0.01, + "loss_char": 0.8839, + "loss_token": 1.7884, + "lr": "2.00e-07", + "norm": 5.5514, + "step": 625 + }, + { + "acc_char": 0.3133, + "acc_token": 0.6975, + "epoch": 0.01, + "loss_char": 0.689, + "loss_token": 1.6005, + "lr": "2.00e-07", + "norm": 5.5656, + "step": 626 + }, + { + "acc_char": 0.2983, + "acc_token": 0.6831, + "epoch": 0.01, + "loss_char": 0.7115, + "loss_token": 1.5559, + "lr": "2.00e-07", + "norm": 5.4372, + "step": 627 + }, + { + "acc_char": 0.2926, + "acc_token": 0.6629, + "epoch": 0.01, + "loss_char": 0.7945, + "loss_token": 1.6611, + "lr": "2.00e-07", + "norm": 5.6938, + "step": 628 + }, + { + "acc_char": 0.2975, + "acc_token": 0.6745, + "epoch": 0.01, + "loss_char": 0.7395, + "loss_token": 1.6412, + "lr": "2.00e-07", + "norm": 5.7502, + "step": 629 + }, + { + "acc_char": 0.3138, + "acc_token": 0.7, + "epoch": 0.01, + "loss_char": 0.6856, + "loss_token": 1.6453, + "lr": "2.00e-07", + "norm": 5.8189, + "step": 630 + }, + { + "acc_char": 0.3096, + "acc_token": 0.6971, + "epoch": 0.01, + "loss_char": 0.7, + "loss_token": 1.576, + "lr": "2.00e-07", + "norm": 5.8166, + "step": 631 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6913, + "epoch": 0.01, + "loss_char": 0.6716, + "loss_token": 1.623, + "lr": "2.00e-07", + "norm": 5.5634, + "step": 632 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6753, + "epoch": 0.01, + "loss_char": 0.7604, + "loss_token": 1.5291, + "lr": "2.00e-07", + "norm": 5.5975, + "step": 633 + }, + { + "acc_char": 0.3103, + "acc_token": 0.6774, + "epoch": 0.01, + "loss_char": 0.7568, + "loss_token": 1.6828, + "lr": "2.00e-07", + "norm": 6.1007, + "step": 634 + }, + { + "acc_char": 0.2999, + "acc_token": 0.674, + "epoch": 0.01, + "loss_char": 0.7689, + "loss_token": 1.6685, + "lr": "2.00e-07", + "norm": 5.8519, + "step": 635 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6732, + "epoch": 0.01, + "loss_char": 0.7638, + "loss_token": 1.648, + "lr": "2.00e-07", + "norm": 6.0581, + "step": 636 + }, + { + "acc_char": 0.2949, + "acc_token": 0.659, + "epoch": 0.01, + "loss_char": 0.8081, + "loss_token": 1.7417, + "lr": "2.00e-07", + "norm": 5.8723, + "step": 637 + }, + { + "acc_char": 0.296, + "acc_token": 0.6734, + "epoch": 0.01, + "loss_char": 0.7464, + "loss_token": 1.6751, + "lr": "2.00e-07", + "norm": 5.7903, + "step": 638 + }, + { + "acc_char": 0.2949, + "acc_token": 0.6709, + "epoch": 0.01, + "loss_char": 0.7442, + "loss_token": 1.6243, + "lr": "2.00e-07", + "norm": 6.1123, + "step": 639 + }, + { + "acc_char": 0.3115, + "acc_token": 0.695, + "epoch": 0.01, + "loss_char": 0.6904, + "loss_token": 1.6397, + "lr": "2.00e-07", + "norm": 5.9206, + "step": 640 + }, + { + "acc_char": 0.3042, + "acc_token": 0.682, + "epoch": 0.01, + "loss_char": 0.7441, + "loss_token": 1.658, + "lr": "2.00e-07", + "norm": 6.0555, + "step": 641 + }, + { + "acc_char": 0.2977, + "acc_token": 0.6757, + "epoch": 0.01, + "loss_char": 0.7398, + "loss_token": 1.6909, + "lr": "2.00e-07", + "norm": 5.7038, + "step": 642 + }, + { + "acc_char": 0.2856, + "acc_token": 0.6136, + "epoch": 0.01, + "loss_char": 0.9157, + "loss_token": 1.7935, + "lr": "2.00e-07", + "norm": 5.842, + "step": 643 + }, + { + "acc_char": 0.3013, + "acc_token": 0.6897, + "epoch": 0.01, + "loss_char": 0.6881, + "loss_token": 1.6058, + "lr": "2.00e-07", + "norm": 5.7919, + "step": 644 + }, + { + "acc_char": 0.2962, + "acc_token": 0.6715, + "epoch": 0.01, + "loss_char": 0.7371, + "loss_token": 1.6975, + "lr": "2.00e-07", + "norm": 5.8936, + "step": 645 + }, + { + "acc_char": 0.302, + "acc_token": 0.6913, + "epoch": 0.01, + "loss_char": 0.6837, + "loss_token": 1.6074, + "lr": "2.00e-07", + "norm": 5.7401, + "step": 646 + }, + { + "acc_char": 0.3001, + "acc_token": 0.6957, + "epoch": 0.01, + "loss_char": 0.6536, + "loss_token": 1.4661, + "lr": "2.00e-07", + "norm": 5.4626, + "step": 647 + }, + { + "acc_char": 0.2965, + "acc_token": 0.6724, + "epoch": 0.01, + "loss_char": 0.7533, + "loss_token": 1.6964, + "lr": "2.00e-07", + "norm": 5.8447, + "step": 648 + }, + { + "acc_char": 0.3172, + "acc_token": 0.7063, + "epoch": 0.01, + "loss_char": 0.6653, + "loss_token": 1.569, + "lr": "2.00e-07", + "norm": 5.7856, + "step": 649 + }, + { + "acc_char": 0.3075, + "acc_token": 0.6877, + "epoch": 0.01, + "loss_char": 0.7405, + "loss_token": 1.6241, + "lr": "2.00e-07", + "norm": 5.8337, + "step": 650 + }, + { + "acc_char": 0.3189, + "acc_token": 0.7198, + "epoch": 0.01, + "loss_char": 0.6324, + "loss_token": 1.5076, + "lr": "2.00e-07", + "norm": 5.5855, + "step": 651 + }, + { + "acc_char": 0.2925, + "acc_token": 0.6688, + "epoch": 0.01, + "loss_char": 0.7463, + "loss_token": 1.6756, + "lr": "2.00e-07", + "norm": 5.5517, + "step": 652 + }, + { + "acc_char": 0.2955, + "acc_token": 0.6403, + "epoch": 0.01, + "loss_char": 0.8404, + "loss_token": 1.7651, + "lr": "2.00e-07", + "norm": 5.3183, + "step": 653 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6892, + "epoch": 0.01, + "loss_char": 0.7223, + "loss_token": 1.6726, + "lr": "2.00e-07", + "norm": 5.9224, + "step": 654 + }, + { + "acc_char": 0.3103, + "acc_token": 0.6965, + "epoch": 0.01, + "loss_char": 0.687, + "loss_token": 1.5716, + "lr": "2.00e-07", + "norm": 5.8924, + "step": 655 + }, + { + "acc_char": 0.2892, + "acc_token": 0.6546, + "epoch": 0.01, + "loss_char": 0.7878, + "loss_token": 1.6663, + "lr": "2.00e-07", + "norm": 5.677, + "step": 656 + }, + { + "acc_char": 0.3089, + "acc_token": 0.7057, + "epoch": 0.01, + "loss_char": 0.6499, + "loss_token": 1.5976, + "lr": "2.00e-07", + "norm": 5.5667, + "step": 657 + }, + { + "acc_char": 0.3024, + "acc_token": 0.6709, + "epoch": 0.01, + "loss_char": 0.7571, + "loss_token": 1.6129, + "lr": "2.00e-07", + "norm": 5.9277, + "step": 658 + }, + { + "acc_char": 0.3105, + "acc_token": 0.7038, + "epoch": 0.01, + "loss_char": 0.673, + "loss_token": 1.6015, + "lr": "2.00e-07", + "norm": 5.6025, + "step": 659 + }, + { + "acc_char": 0.2913, + "acc_token": 0.6678, + "epoch": 0.01, + "loss_char": 0.7561, + "loss_token": 1.6811, + "lr": "2.00e-07", + "norm": 6.2516, + "step": 660 + }, + { + "acc_char": 0.3001, + "acc_token": 0.6993, + "epoch": 0.01, + "loss_char": 0.6578, + "loss_token": 1.5482, + "lr": "2.00e-07", + "norm": 5.6221, + "step": 661 + }, + { + "acc_char": 0.3069, + "acc_token": 0.6845, + "epoch": 0.01, + "loss_char": 0.7393, + "loss_token": 1.6031, + "lr": "2.00e-07", + "norm": 5.45, + "step": 662 + }, + { + "acc_char": 0.3284, + "acc_token": 0.7275, + "epoch": 0.01, + "loss_char": 0.6488, + "loss_token": 1.5209, + "lr": "2.00e-07", + "norm": 5.5336, + "step": 663 + }, + { + "acc_char": 0.3029, + "acc_token": 0.6869, + "epoch": 0.01, + "loss_char": 0.7221, + "loss_token": 1.6319, + "lr": "2.00e-07", + "norm": 5.9041, + "step": 664 + }, + { + "acc_char": 0.2964, + "acc_token": 0.671, + "epoch": 0.01, + "loss_char": 0.7496, + "loss_token": 1.5912, + "lr": "2.00e-07", + "norm": 5.6728, + "step": 665 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6947, + "epoch": 0.01, + "loss_char": 0.6923, + "loss_token": 1.5917, + "lr": "2.00e-07", + "norm": 5.6104, + "step": 666 + }, + { + "acc_char": 0.301, + "acc_token": 0.6766, + "epoch": 0.01, + "loss_char": 0.7448, + "loss_token": 1.5958, + "lr": "2.00e-07", + "norm": 5.7438, + "step": 667 + }, + { + "acc_char": 0.3047, + "acc_token": 0.684, + "epoch": 0.01, + "loss_char": 0.7429, + "loss_token": 1.715, + "lr": "2.00e-07", + "norm": 5.9348, + "step": 668 + }, + { + "acc_char": 0.2961, + "acc_token": 0.6706, + "epoch": 0.01, + "loss_char": 0.7799, + "loss_token": 1.6612, + "lr": "2.00e-07", + "norm": 5.7273, + "step": 669 + }, + { + "acc_char": 0.3023, + "acc_token": 0.6895, + "epoch": 0.01, + "loss_char": 0.6908, + "loss_token": 1.5717, + "lr": "2.00e-07", + "norm": 5.6183, + "step": 670 + }, + { + "acc_char": 0.3234, + "acc_token": 0.7105, + "epoch": 0.01, + "loss_char": 0.6642, + "loss_token": 1.5869, + "lr": "2.00e-07", + "norm": 5.6473, + "step": 671 + }, + { + "acc_char": 0.3022, + "acc_token": 0.694, + "epoch": 0.01, + "loss_char": 0.6711, + "loss_token": 1.5884, + "lr": "2.00e-07", + "norm": 5.3875, + "step": 672 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6902, + "epoch": 0.01, + "loss_char": 0.7018, + "loss_token": 1.5795, + "lr": "2.00e-07", + "norm": 5.7039, + "step": 673 + }, + { + "acc_char": 0.3078, + "acc_token": 0.6989, + "epoch": 0.01, + "loss_char": 0.6835, + "loss_token": 1.5994, + "lr": "2.00e-07", + "norm": 5.6163, + "step": 674 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6877, + "epoch": 0.01, + "loss_char": 0.7003, + "loss_token": 1.581, + "lr": "2.00e-07", + "norm": 5.638, + "step": 675 + }, + { + "acc_char": 0.2879, + "acc_token": 0.661, + "epoch": 0.01, + "loss_char": 0.7901, + "loss_token": 1.7105, + "lr": "2.00e-07", + "norm": 5.9664, + "step": 676 + }, + { + "acc_char": 0.2932, + "acc_token": 0.6609, + "epoch": 0.01, + "loss_char": 0.8064, + "loss_token": 1.7189, + "lr": "2.00e-07", + "norm": 5.7921, + "step": 677 + }, + { + "acc_char": 0.3141, + "acc_token": 0.6946, + "epoch": 0.01, + "loss_char": 0.6943, + "loss_token": 1.5659, + "lr": "2.00e-07", + "norm": 5.6831, + "step": 678 + }, + { + "acc_char": 0.3159, + "acc_token": 0.7043, + "epoch": 0.01, + "loss_char": 0.6686, + "loss_token": 1.5816, + "lr": "2.00e-07", + "norm": 5.5089, + "step": 679 + }, + { + "acc_char": 0.3145, + "acc_token": 0.7022, + "epoch": 0.01, + "loss_char": 0.667, + "loss_token": 1.533, + "lr": "2.00e-07", + "norm": 5.797, + "step": 680 + }, + { + "acc_char": 0.3081, + "acc_token": 0.6984, + "epoch": 0.01, + "loss_char": 0.6948, + "loss_token": 1.4594, + "lr": "2.00e-07", + "norm": 5.7751, + "step": 681 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6793, + "epoch": 0.01, + "loss_char": 0.7456, + "loss_token": 1.712, + "lr": "2.00e-07", + "norm": 6.2523, + "step": 682 + }, + { + "acc_char": 0.3072, + "acc_token": 0.6903, + "epoch": 0.01, + "loss_char": 0.7033, + "loss_token": 1.6286, + "lr": "2.00e-07", + "norm": 6.0363, + "step": 683 + }, + { + "acc_char": 0.319, + "acc_token": 0.71, + "epoch": 0.01, + "loss_char": 0.6502, + "loss_token": 1.509, + "lr": "2.00e-07", + "norm": 5.4379, + "step": 684 + }, + { + "acc_char": 0.2994, + "acc_token": 0.6809, + "epoch": 0.01, + "loss_char": 0.7212, + "loss_token": 1.6315, + "lr": "2.00e-07", + "norm": 5.6878, + "step": 685 + }, + { + "acc_char": 0.3003, + "acc_token": 0.692, + "epoch": 0.01, + "loss_char": 0.6758, + "loss_token": 1.5584, + "lr": "2.00e-07", + "norm": 5.3446, + "step": 686 + }, + { + "acc_char": 0.2998, + "acc_token": 0.6789, + "epoch": 0.01, + "loss_char": 0.7216, + "loss_token": 1.6356, + "lr": "2.00e-07", + "norm": 5.8065, + "step": 687 + }, + { + "acc_char": 0.3075, + "acc_token": 0.6532, + "epoch": 0.01, + "loss_char": 0.8291, + "loss_token": 1.6488, + "lr": "2.00e-07", + "norm": 5.4422, + "step": 688 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6787, + "epoch": 0.01, + "loss_char": 0.7095, + "loss_token": 1.5677, + "lr": "2.00e-07", + "norm": 5.7396, + "step": 689 + }, + { + "acc_char": 0.3176, + "acc_token": 0.7026, + "epoch": 0.01, + "loss_char": 0.6848, + "loss_token": 1.5056, + "lr": "2.00e-07", + "norm": 5.636, + "step": 690 + }, + { + "acc_char": 0.3101, + "acc_token": 0.6997, + "epoch": 0.01, + "loss_char": 0.6909, + "loss_token": 1.6358, + "lr": "2.00e-07", + "norm": 5.7493, + "step": 691 + }, + { + "acc_char": 0.3137, + "acc_token": 0.7156, + "epoch": 0.01, + "loss_char": 0.6279, + "loss_token": 1.537, + "lr": "2.00e-07", + "norm": 5.8835, + "step": 692 + }, + { + "acc_char": 0.2932, + "acc_token": 0.6654, + "epoch": 0.01, + "loss_char": 0.7754, + "loss_token": 1.6879, + "lr": "2.00e-07", + "norm": 5.8848, + "step": 693 + }, + { + "acc_char": 0.3011, + "acc_token": 0.6875, + "epoch": 0.01, + "loss_char": 0.7015, + "loss_token": 1.6103, + "lr": "2.00e-07", + "norm": 5.6533, + "step": 694 + }, + { + "acc_char": 0.2931, + "acc_token": 0.6376, + "epoch": 0.01, + "loss_char": 0.8559, + "loss_token": 1.7466, + "lr": "2.00e-07", + "norm": 5.4473, + "step": 695 + }, + { + "acc_char": 0.2941, + "acc_token": 0.6733, + "epoch": 0.01, + "loss_char": 0.7255, + "loss_token": 1.635, + "lr": "2.00e-07", + "norm": 5.7071, + "step": 696 + }, + { + "acc_char": 0.3102, + "acc_token": 0.6983, + "epoch": 0.01, + "loss_char": 0.6859, + "loss_token": 1.5594, + "lr": "2.00e-07", + "norm": 5.4668, + "step": 697 + }, + { + "acc_char": 0.3086, + "acc_token": 0.6872, + "epoch": 0.01, + "loss_char": 0.7307, + "loss_token": 1.6843, + "lr": "2.00e-07", + "norm": 5.5325, + "step": 698 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6743, + "epoch": 0.01, + "loss_char": 0.7607, + "loss_token": 1.6332, + "lr": "2.00e-07", + "norm": 5.5354, + "step": 699 + }, + { + "acc_char": 0.2952, + "acc_token": 0.6822, + "epoch": 0.01, + "loss_char": 0.7013, + "loss_token": 1.6426, + "lr": "2.00e-07", + "norm": 5.5413, + "step": 700 + }, + { + "acc_char": 0.304, + "acc_token": 0.6866, + "epoch": 0.01, + "loss_char": 0.714, + "loss_token": 1.6075, + "lr": "2.00e-07", + "norm": 5.6022, + "step": 701 + }, + { + "acc_char": 0.2911, + "acc_token": 0.6571, + "epoch": 0.01, + "loss_char": 0.8123, + "loss_token": 1.6884, + "lr": "2.00e-07", + "norm": 5.6951, + "step": 702 + }, + { + "acc_char": 0.2935, + "acc_token": 0.6654, + "epoch": 0.01, + "loss_char": 0.7613, + "loss_token": 1.6945, + "lr": "2.00e-07", + "norm": 5.931, + "step": 703 + }, + { + "acc_char": 0.2941, + "acc_token": 0.6635, + "epoch": 0.01, + "loss_char": 0.765, + "loss_token": 1.7128, + "lr": "2.00e-07", + "norm": 6.0656, + "step": 704 + }, + { + "acc_char": 0.3081, + "acc_token": 0.6853, + "epoch": 0.01, + "loss_char": 0.7405, + "loss_token": 1.7216, + "lr": "2.00e-07", + "norm": 5.7774, + "step": 705 + }, + { + "acc_char": 0.3127, + "acc_token": 0.6943, + "epoch": 0.01, + "loss_char": 0.7052, + "loss_token": 1.6332, + "lr": "2.00e-07", + "norm": 5.8129, + "step": 706 + }, + { + "acc_char": 0.304, + "acc_token": 0.6762, + "epoch": 0.01, + "loss_char": 0.7689, + "loss_token": 1.6594, + "lr": "2.00e-07", + "norm": 5.8423, + "step": 707 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6949, + "epoch": 0.01, + "loss_char": 0.6868, + "loss_token": 1.6849, + "lr": "2.00e-07", + "norm": 5.7876, + "step": 708 + }, + { + "acc_char": 0.3085, + "acc_token": 0.6924, + "epoch": 0.01, + "loss_char": 0.6746, + "loss_token": 1.5821, + "lr": "2.00e-07", + "norm": 5.5369, + "step": 709 + }, + { + "acc_char": 0.294, + "acc_token": 0.6777, + "epoch": 0.01, + "loss_char": 0.7509, + "loss_token": 1.7132, + "lr": "2.00e-07", + "norm": 5.7105, + "step": 710 + }, + { + "acc_char": 0.3187, + "acc_token": 0.7084, + "epoch": 0.01, + "loss_char": 0.6792, + "loss_token": 1.5886, + "lr": "2.00e-07", + "norm": 5.4638, + "step": 711 + }, + { + "acc_char": 0.3075, + "acc_token": 0.693, + "epoch": 0.01, + "loss_char": 0.7031, + "loss_token": 1.6067, + "lr": "2.00e-07", + "norm": 5.5524, + "step": 712 + }, + { + "acc_char": 0.2849, + "acc_token": 0.6219, + "epoch": 0.01, + "loss_char": 0.927, + "loss_token": 1.8086, + "lr": "2.00e-07", + "norm": 5.673, + "step": 713 + }, + { + "acc_char": 0.2773, + "acc_token": 0.6437, + "epoch": 0.01, + "loss_char": 0.8132, + "loss_token": 1.7713, + "lr": "2.00e-07", + "norm": 5.9949, + "step": 714 + }, + { + "acc_char": 0.3059, + "acc_token": 0.6888, + "epoch": 0.01, + "loss_char": 0.7081, + "loss_token": 1.5482, + "lr": "2.00e-07", + "norm": 5.521, + "step": 715 + }, + { + "acc_char": 0.308, + "acc_token": 0.693, + "epoch": 0.01, + "loss_char": 0.7066, + "loss_token": 1.6414, + "lr": "2.00e-07", + "norm": 5.5016, + "step": 716 + }, + { + "acc_char": 0.3138, + "acc_token": 0.6991, + "epoch": 0.01, + "loss_char": 0.6901, + "loss_token": 1.5942, + "lr": "2.00e-07", + "norm": 5.4398, + "step": 717 + }, + { + "acc_char": 0.3008, + "acc_token": 0.6882, + "epoch": 0.01, + "loss_char": 0.6941, + "loss_token": 1.6136, + "lr": "2.00e-07", + "norm": 5.9947, + "step": 718 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6727, + "epoch": 0.01, + "loss_char": 0.7845, + "loss_token": 1.7138, + "lr": "2.00e-07", + "norm": 5.8203, + "step": 719 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6859, + "epoch": 0.01, + "loss_char": 0.7195, + "loss_token": 1.5991, + "lr": "2.00e-07", + "norm": 5.6985, + "step": 720 + }, + { + "acc_char": 0.2933, + "acc_token": 0.6239, + "epoch": 0.01, + "loss_char": 0.9131, + "loss_token": 1.6806, + "lr": "2.00e-07", + "norm": 5.6157, + "step": 721 + }, + { + "acc_char": 0.3045, + "acc_token": 0.7004, + "epoch": 0.01, + "loss_char": 0.6579, + "loss_token": 1.5557, + "lr": "2.00e-07", + "norm": 5.5476, + "step": 722 + }, + { + "acc_char": 0.325, + "acc_token": 0.728, + "epoch": 0.01, + "loss_char": 0.6151, + "loss_token": 1.445, + "lr": "2.00e-07", + "norm": 5.3312, + "step": 723 + }, + { + "acc_char": 0.3197, + "acc_token": 0.7111, + "epoch": 0.01, + "loss_char": 0.6784, + "loss_token": 1.46, + "lr": "2.00e-07", + "norm": 5.5799, + "step": 724 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6815, + "epoch": 0.01, + "loss_char": 0.7086, + "loss_token": 1.6515, + "lr": "2.00e-07", + "norm": 5.9712, + "step": 725 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6724, + "epoch": 0.01, + "loss_char": 0.763, + "loss_token": 1.7044, + "lr": "2.00e-07", + "norm": 5.7853, + "step": 726 + }, + { + "acc_char": 0.3038, + "acc_token": 0.6934, + "epoch": 0.01, + "loss_char": 0.6967, + "loss_token": 1.572, + "lr": "2.00e-07", + "norm": 5.8128, + "step": 727 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6949, + "epoch": 0.01, + "loss_char": 0.6848, + "loss_token": 1.522, + "lr": "2.00e-07", + "norm": 5.7481, + "step": 728 + }, + { + "acc_char": 0.3226, + "acc_token": 0.7245, + "epoch": 0.01, + "loss_char": 0.6269, + "loss_token": 1.5333, + "lr": "2.00e-07", + "norm": 5.6008, + "step": 729 + }, + { + "acc_char": 0.3065, + "acc_token": 0.6905, + "epoch": 0.01, + "loss_char": 0.7018, + "loss_token": 1.6624, + "lr": "2.00e-07", + "norm": 6.1109, + "step": 730 + }, + { + "acc_char": 0.3067, + "acc_token": 0.6836, + "epoch": 0.01, + "loss_char": 0.7501, + "loss_token": 1.6427, + "lr": "2.00e-07", + "norm": 6.2004, + "step": 731 + }, + { + "acc_char": 0.2936, + "acc_token": 0.6625, + "epoch": 0.01, + "loss_char": 0.7793, + "loss_token": 1.6704, + "lr": "2.00e-07", + "norm": 5.7747, + "step": 732 + }, + { + "acc_char": 0.297, + "acc_token": 0.6706, + "epoch": 0.01, + "loss_char": 0.7602, + "loss_token": 1.6386, + "lr": "2.00e-07", + "norm": 5.8706, + "step": 733 + }, + { + "acc_char": 0.3177, + "acc_token": 0.7132, + "epoch": 0.01, + "loss_char": 0.6471, + "loss_token": 1.5085, + "lr": "2.00e-07", + "norm": 5.4413, + "step": 734 + }, + { + "acc_char": 0.3092, + "acc_token": 0.7031, + "epoch": 0.01, + "loss_char": 0.6671, + "loss_token": 1.6531, + "lr": "2.00e-07", + "norm": 6.0109, + "step": 735 + }, + { + "acc_char": 0.3015, + "acc_token": 0.6878, + "epoch": 0.01, + "loss_char": 0.6874, + "loss_token": 1.6372, + "lr": "2.00e-07", + "norm": 5.596, + "step": 736 + }, + { + "acc_char": 0.3057, + "acc_token": 0.6861, + "epoch": 0.01, + "loss_char": 0.7389, + "loss_token": 1.6141, + "lr": "2.00e-07", + "norm": 5.8621, + "step": 737 + }, + { + "acc_char": 0.3023, + "acc_token": 0.6911, + "epoch": 0.01, + "loss_char": 0.7052, + "loss_token": 1.5856, + "lr": "2.00e-07", + "norm": 5.5047, + "step": 738 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6785, + "epoch": 0.01, + "loss_char": 0.752, + "loss_token": 1.6367, + "lr": "2.00e-07", + "norm": 5.5477, + "step": 739 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6987, + "epoch": 0.01, + "loss_char": 0.6865, + "loss_token": 1.5974, + "lr": "2.00e-07", + "norm": 6.0687, + "step": 740 + }, + { + "acc_char": 0.3089, + "acc_token": 0.6936, + "epoch": 0.01, + "loss_char": 0.7127, + "loss_token": 1.6018, + "lr": "2.00e-07", + "norm": 5.746, + "step": 741 + }, + { + "acc_char": 0.3024, + "acc_token": 0.6732, + "epoch": 0.01, + "loss_char": 0.7528, + "loss_token": 1.677, + "lr": "2.00e-07", + "norm": 5.7916, + "step": 742 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6589, + "epoch": 0.01, + "loss_char": 0.8215, + "loss_token": 1.7048, + "lr": "2.00e-07", + "norm": 5.8918, + "step": 743 + }, + { + "acc_char": 0.3012, + "acc_token": 0.6779, + "epoch": 0.01, + "loss_char": 0.7707, + "loss_token": 1.6728, + "lr": "2.00e-07", + "norm": 5.7565, + "step": 744 + }, + { + "acc_char": 0.3041, + "acc_token": 0.6828, + "epoch": 0.01, + "loss_char": 0.7289, + "loss_token": 1.5819, + "lr": "2.00e-07", + "norm": 5.6077, + "step": 745 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6637, + "epoch": 0.01, + "loss_char": 0.7989, + "loss_token": 1.6926, + "lr": "2.00e-07", + "norm": 5.8018, + "step": 746 + }, + { + "acc_char": 0.3197, + "acc_token": 0.7233, + "epoch": 0.01, + "loss_char": 0.6119, + "loss_token": 1.6102, + "lr": "2.00e-07", + "norm": 5.7715, + "step": 747 + }, + { + "acc_char": 0.3048, + "acc_token": 0.6811, + "epoch": 0.01, + "loss_char": 0.7491, + "loss_token": 1.6751, + "lr": "2.00e-07", + "norm": 5.7565, + "step": 748 + }, + { + "acc_char": 0.3144, + "acc_token": 0.706, + "epoch": 0.01, + "loss_char": 0.6729, + "loss_token": 1.5566, + "lr": "2.00e-07", + "norm": 5.6953, + "step": 749 + }, + { + "acc_char": 0.3123, + "acc_token": 0.6989, + "epoch": 0.01, + "loss_char": 0.6951, + "loss_token": 1.5327, + "lr": "2.00e-07", + "norm": 5.5628, + "step": 750 + }, + { + "acc_char": 0.3054, + "acc_token": 0.7036, + "epoch": 0.01, + "loss_char": 0.6472, + "loss_token": 1.5634, + "lr": "2.00e-07", + "norm": 5.4744, + "step": 751 + }, + { + "acc_char": 0.2984, + "acc_token": 0.675, + "epoch": 0.01, + "loss_char": 0.7445, + "loss_token": 1.643, + "lr": "2.00e-07", + "norm": 5.8631, + "step": 752 + }, + { + "acc_char": 0.3052, + "acc_token": 0.6844, + "epoch": 0.01, + "loss_char": 0.7517, + "loss_token": 1.6013, + "lr": "2.00e-07", + "norm": 6.0664, + "step": 753 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6907, + "epoch": 0.01, + "loss_char": 0.6976, + "loss_token": 1.6298, + "lr": "2.00e-07", + "norm": 5.4927, + "step": 754 + }, + { + "acc_char": 0.2969, + "acc_token": 0.6864, + "epoch": 0.01, + "loss_char": 0.7042, + "loss_token": 1.5909, + "lr": "2.00e-07", + "norm": 5.7039, + "step": 755 + }, + { + "acc_char": 0.2922, + "acc_token": 0.6709, + "epoch": 0.01, + "loss_char": 0.7614, + "loss_token": 1.6731, + "lr": "2.00e-07", + "norm": 5.6499, + "step": 756 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6751, + "epoch": 0.01, + "loss_char": 0.7543, + "loss_token": 1.7016, + "lr": "2.00e-07", + "norm": 5.9883, + "step": 757 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6894, + "epoch": 0.01, + "loss_char": 0.7218, + "loss_token": 1.6053, + "lr": "2.00e-07", + "norm": 5.7038, + "step": 758 + }, + { + "acc_char": 0.3117, + "acc_token": 0.6955, + "epoch": 0.01, + "loss_char": 0.6966, + "loss_token": 1.517, + "lr": "2.00e-07", + "norm": 5.3883, + "step": 759 + }, + { + "acc_char": 0.3063, + "acc_token": 0.693, + "epoch": 0.01, + "loss_char": 0.7054, + "loss_token": 1.5747, + "lr": "2.00e-07", + "norm": 5.5406, + "step": 760 + }, + { + "acc_char": 0.299, + "acc_token": 0.6792, + "epoch": 0.01, + "loss_char": 0.731, + "loss_token": 1.6095, + "lr": "2.00e-07", + "norm": 5.4993, + "step": 761 + }, + { + "acc_char": 0.3368, + "acc_token": 0.7164, + "epoch": 0.01, + "loss_char": 0.6217, + "loss_token": 1.5015, + "lr": "2.00e-07", + "norm": 5.1857, + "step": 762 + }, + { + "acc_char": 0.32, + "acc_token": 0.7251, + "epoch": 0.01, + "loss_char": 0.6227, + "loss_token": 1.5617, + "lr": "2.00e-07", + "norm": 5.6146, + "step": 763 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6952, + "epoch": 0.01, + "loss_char": 0.6669, + "loss_token": 1.5794, + "lr": "2.00e-07", + "norm": 5.564, + "step": 764 + }, + { + "acc_char": 0.2932, + "acc_token": 0.6711, + "epoch": 0.01, + "loss_char": 0.7361, + "loss_token": 1.6644, + "lr": "2.00e-07", + "norm": 5.783, + "step": 765 + }, + { + "acc_char": 0.3053, + "acc_token": 0.684, + "epoch": 0.01, + "loss_char": 0.7416, + "loss_token": 1.6462, + "lr": "2.00e-07", + "norm": 5.962, + "step": 766 + }, + { + "acc_char": 0.3112, + "acc_token": 0.6991, + "epoch": 0.01, + "loss_char": 0.6947, + "loss_token": 1.5991, + "lr": "2.00e-07", + "norm": 5.9584, + "step": 767 + }, + { + "acc_char": 0.3141, + "acc_token": 0.7128, + "epoch": 0.01, + "loss_char": 0.6485, + "loss_token": 1.5699, + "lr": "2.00e-07", + "norm": 5.4834, + "step": 768 + }, + { + "acc_char": 0.3241, + "acc_token": 0.7386, + "epoch": 0.01, + "loss_char": 0.5885, + "loss_token": 1.5736, + "lr": "2.00e-07", + "norm": 5.707, + "step": 769 + }, + { + "acc_char": 0.3106, + "acc_token": 0.6966, + "epoch": 0.01, + "loss_char": 0.6809, + "loss_token": 1.6651, + "lr": "2.00e-07", + "norm": 6.4674, + "step": 770 + }, + { + "acc_char": 0.3109, + "acc_token": 0.7015, + "epoch": 0.01, + "loss_char": 0.6787, + "loss_token": 1.5798, + "lr": "2.00e-07", + "norm": 5.4152, + "step": 771 + }, + { + "acc_char": 0.2791, + "acc_token": 0.6064, + "epoch": 0.01, + "loss_char": 0.9144, + "loss_token": 1.7602, + "lr": "2.00e-07", + "norm": 5.3641, + "step": 772 + }, + { + "acc_char": 0.3082, + "acc_token": 0.6722, + "epoch": 0.01, + "loss_char": 0.7471, + "loss_token": 1.6349, + "lr": "2.00e-07", + "norm": 5.1329, + "step": 773 + }, + { + "acc_char": 0.2971, + "acc_token": 0.6746, + "epoch": 0.01, + "loss_char": 0.7329, + "loss_token": 1.7143, + "lr": "2.00e-07", + "norm": 5.9449, + "step": 774 + }, + { + "acc_char": 0.2802, + "acc_token": 0.6183, + "epoch": 0.01, + "loss_char": 0.8729, + "loss_token": 1.7259, + "lr": "2.00e-07", + "norm": 5.5167, + "step": 775 + }, + { + "acc_char": 0.3069, + "acc_token": 0.6942, + "epoch": 0.01, + "loss_char": 0.6939, + "loss_token": 1.653, + "lr": "2.00e-07", + "norm": 5.749, + "step": 776 + }, + { + "acc_char": 0.2951, + "acc_token": 0.6668, + "epoch": 0.01, + "loss_char": 0.7659, + "loss_token": 1.6842, + "lr": "2.00e-07", + "norm": 5.5857, + "step": 777 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6804, + "epoch": 0.01, + "loss_char": 0.7307, + "loss_token": 1.6822, + "lr": "2.00e-07", + "norm": 5.941, + "step": 778 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6777, + "epoch": 0.01, + "loss_char": 0.7417, + "loss_token": 1.6323, + "lr": "2.00e-07", + "norm": 5.6713, + "step": 779 + }, + { + "acc_char": 0.2949, + "acc_token": 0.6748, + "epoch": 0.01, + "loss_char": 0.7385, + "loss_token": 1.7698, + "lr": "2.00e-07", + "norm": 5.7486, + "step": 780 + }, + { + "acc_char": 0.2982, + "acc_token": 0.6756, + "epoch": 0.01, + "loss_char": 0.7716, + "loss_token": 1.8033, + "lr": "2.00e-07", + "norm": 5.9043, + "step": 781 + }, + { + "acc_char": 0.3044, + "acc_token": 0.7008, + "epoch": 0.01, + "loss_char": 0.6561, + "loss_token": 1.5758, + "lr": "2.00e-07", + "norm": 5.4415, + "step": 782 + }, + { + "acc_char": 0.2863, + "acc_token": 0.6593, + "epoch": 0.01, + "loss_char": 0.7863, + "loss_token": 1.7233, + "lr": "2.00e-07", + "norm": 5.7104, + "step": 783 + }, + { + "acc_char": 0.3049, + "acc_token": 0.6844, + "epoch": 0.01, + "loss_char": 0.7141, + "loss_token": 1.6678, + "lr": "2.00e-07", + "norm": 6.1641, + "step": 784 + }, + { + "acc_char": 0.297, + "acc_token": 0.681, + "epoch": 0.01, + "loss_char": 0.7087, + "loss_token": 1.6866, + "lr": "2.00e-07", + "norm": 5.9054, + "step": 785 + }, + { + "acc_char": 0.3028, + "acc_token": 0.686, + "epoch": 0.01, + "loss_char": 0.7252, + "loss_token": 1.6103, + "lr": "2.00e-07", + "norm": 5.7014, + "step": 786 + }, + { + "acc_char": 0.3063, + "acc_token": 0.6935, + "epoch": 0.01, + "loss_char": 0.6981, + "loss_token": 1.6534, + "lr": "2.00e-07", + "norm": 5.6859, + "step": 787 + }, + { + "acc_char": 0.2767, + "acc_token": 0.6098, + "epoch": 0.01, + "loss_char": 0.8904, + "loss_token": 1.7864, + "lr": "2.00e-07", + "norm": 5.5673, + "step": 788 + }, + { + "acc_char": 0.3095, + "acc_token": 0.6966, + "epoch": 0.01, + "loss_char": 0.6927, + "loss_token": 1.5377, + "lr": "2.00e-07", + "norm": 5.614, + "step": 789 + }, + { + "acc_char": 0.3046, + "acc_token": 0.6851, + "epoch": 0.01, + "loss_char": 0.7454, + "loss_token": 1.601, + "lr": "2.00e-07", + "norm": 5.6357, + "step": 790 + }, + { + "acc_char": 0.3011, + "acc_token": 0.6869, + "epoch": 0.01, + "loss_char": 0.7022, + "loss_token": 1.6659, + "lr": "2.00e-07", + "norm": 5.6749, + "step": 791 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6817, + "epoch": 0.01, + "loss_char": 0.7403, + "loss_token": 1.6017, + "lr": "2.00e-07", + "norm": 5.5231, + "step": 792 + }, + { + "acc_char": 0.3026, + "acc_token": 0.6878, + "epoch": 0.01, + "loss_char": 0.701, + "loss_token": 1.5683, + "lr": "2.00e-07", + "norm": 6.018, + "step": 793 + }, + { + "acc_char": 0.3143, + "acc_token": 0.6957, + "epoch": 0.01, + "loss_char": 0.6879, + "loss_token": 1.6818, + "lr": "2.00e-07", + "norm": 6.0463, + "step": 794 + }, + { + "acc_char": 0.3123, + "acc_token": 0.7032, + "epoch": 0.01, + "loss_char": 0.6796, + "loss_token": 1.559, + "lr": "2.00e-07", + "norm": 5.3763, + "step": 795 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6721, + "epoch": 0.01, + "loss_char": 0.7448, + "loss_token": 1.6649, + "lr": "2.00e-07", + "norm": 5.6504, + "step": 796 + }, + { + "acc_char": 0.3108, + "acc_token": 0.6951, + "epoch": 0.01, + "loss_char": 0.71, + "loss_token": 1.5358, + "lr": "2.00e-07", + "norm": 5.5775, + "step": 797 + }, + { + "acc_char": 0.2889, + "acc_token": 0.6635, + "epoch": 0.01, + "loss_char": 0.7764, + "loss_token": 1.6694, + "lr": "2.00e-07", + "norm": 5.8663, + "step": 798 + }, + { + "acc_char": 0.3095, + "acc_token": 0.7013, + "epoch": 0.01, + "loss_char": 0.6619, + "loss_token": 1.5684, + "lr": "2.00e-07", + "norm": 5.5381, + "step": 799 + }, + { + "acc_char": 0.3073, + "acc_token": 0.6848, + "epoch": 0.01, + "loss_char": 0.7427, + "loss_token": 1.6565, + "lr": "2.00e-07", + "norm": 5.7262, + "step": 800 + }, + { + "acc_char": 0.2876, + "acc_token": 0.6455, + "epoch": 0.01, + "loss_char": 0.8496, + "loss_token": 1.76, + "lr": "2.00e-07", + "norm": 5.9846, + "step": 801 + }, + { + "acc_char": 0.2866, + "acc_token": 0.6213, + "epoch": 0.01, + "loss_char": 0.8847, + "loss_token": 1.7219, + "lr": "2.00e-07", + "norm": 5.528, + "step": 802 + }, + { + "acc_char": 0.294, + "acc_token": 0.6332, + "epoch": 0.01, + "loss_char": 0.8649, + "loss_token": 1.768, + "lr": "2.00e-07", + "norm": 5.6615, + "step": 803 + }, + { + "acc_char": 0.2963, + "acc_token": 0.6698, + "epoch": 0.01, + "loss_char": 0.7597, + "loss_token": 1.689, + "lr": "2.00e-07", + "norm": 5.7607, + "step": 804 + }, + { + "acc_char": 0.298, + "acc_token": 0.6756, + "epoch": 0.01, + "loss_char": 0.7265, + "loss_token": 1.6196, + "lr": "2.00e-07", + "norm": 5.5886, + "step": 805 + }, + { + "acc_char": 0.2965, + "acc_token": 0.6738, + "epoch": 0.01, + "loss_char": 0.7418, + "loss_token": 1.6831, + "lr": "2.00e-07", + "norm": 5.6464, + "step": 806 + }, + { + "acc_char": 0.3002, + "acc_token": 0.6889, + "epoch": 0.01, + "loss_char": 0.6854, + "loss_token": 1.6537, + "lr": "2.00e-07", + "norm": 5.7818, + "step": 807 + }, + { + "acc_char": 0.2867, + "acc_token": 0.6546, + "epoch": 0.01, + "loss_char": 0.8185, + "loss_token": 1.7508, + "lr": "2.00e-07", + "norm": 6.1051, + "step": 808 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6727, + "epoch": 0.01, + "loss_char": 0.768, + "loss_token": 1.6273, + "lr": "2.00e-07", + "norm": 5.7804, + "step": 809 + }, + { + "acc_char": 0.2798, + "acc_token": 0.6473, + "epoch": 0.01, + "loss_char": 0.8247, + "loss_token": 1.7481, + "lr": "2.00e-07", + "norm": 5.932, + "step": 810 + }, + { + "acc_char": 0.2984, + "acc_token": 0.6759, + "epoch": 0.01, + "loss_char": 0.753, + "loss_token": 1.6339, + "lr": "2.00e-07", + "norm": 5.5975, + "step": 811 + }, + { + "acc_char": 0.3008, + "acc_token": 0.6724, + "epoch": 0.01, + "loss_char": 0.7605, + "loss_token": 1.6948, + "lr": "2.00e-07", + "norm": 5.8128, + "step": 812 + }, + { + "acc_char": 0.3144, + "acc_token": 0.7123, + "epoch": 0.01, + "loss_char": 0.6638, + "loss_token": 1.5634, + "lr": "2.00e-07", + "norm": 5.3602, + "step": 813 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6854, + "epoch": 0.01, + "loss_char": 0.7074, + "loss_token": 1.5532, + "lr": "2.00e-07", + "norm": 5.7765, + "step": 814 + }, + { + "acc_char": 0.3047, + "acc_token": 0.6809, + "epoch": 0.01, + "loss_char": 0.7457, + "loss_token": 1.5699, + "lr": "2.00e-07", + "norm": 5.5826, + "step": 815 + }, + { + "acc_char": 0.3005, + "acc_token": 0.6828, + "epoch": 0.01, + "loss_char": 0.7312, + "loss_token": 1.559, + "lr": "2.00e-07", + "norm": 5.6165, + "step": 816 + }, + { + "acc_char": 0.332, + "acc_token": 0.7324, + "epoch": 0.01, + "loss_char": 0.6057, + "loss_token": 1.516, + "lr": "2.00e-07", + "norm": 5.4287, + "step": 817 + }, + { + "acc_char": 0.2929, + "acc_token": 0.6608, + "epoch": 0.01, + "loss_char": 0.779, + "loss_token": 1.7271, + "lr": "2.00e-07", + "norm": 5.6373, + "step": 818 + }, + { + "acc_char": 0.3008, + "acc_token": 0.6771, + "epoch": 0.01, + "loss_char": 0.7156, + "loss_token": 1.6334, + "lr": "2.00e-07", + "norm": 5.5915, + "step": 819 + }, + { + "acc_char": 0.307, + "acc_token": 0.6834, + "epoch": 0.02, + "loss_char": 0.7347, + "loss_token": 1.6243, + "lr": "2.00e-07", + "norm": 5.7642, + "step": 820 + }, + { + "acc_char": 0.3159, + "acc_token": 0.7048, + "epoch": 0.02, + "loss_char": 0.681, + "loss_token": 1.5652, + "lr": "2.00e-07", + "norm": 5.6864, + "step": 821 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6917, + "epoch": 0.02, + "loss_char": 0.6971, + "loss_token": 1.5949, + "lr": "2.00e-07", + "norm": 5.6179, + "step": 822 + }, + { + "acc_char": 0.2979, + "acc_token": 0.683, + "epoch": 0.02, + "loss_char": 0.7057, + "loss_token": 1.6006, + "lr": "2.00e-07", + "norm": 5.9728, + "step": 823 + }, + { + "acc_char": 0.309, + "acc_token": 0.6976, + "epoch": 0.02, + "loss_char": 0.6793, + "loss_token": 1.5622, + "lr": "2.00e-07", + "norm": 5.7353, + "step": 824 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6753, + "epoch": 0.02, + "loss_char": 0.7406, + "loss_token": 1.6784, + "lr": "2.00e-07", + "norm": 5.7657, + "step": 825 + }, + { + "acc_char": 0.2928, + "acc_token": 0.6691, + "epoch": 0.02, + "loss_char": 0.7477, + "loss_token": 1.6609, + "lr": "2.00e-07", + "norm": 5.9937, + "step": 826 + }, + { + "acc_char": 0.3049, + "acc_token": 0.6844, + "epoch": 0.02, + "loss_char": 0.7368, + "loss_token": 1.6281, + "lr": "2.00e-07", + "norm": 5.7378, + "step": 827 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6795, + "epoch": 0.02, + "loss_char": 0.718, + "loss_token": 1.5856, + "lr": "2.00e-07", + "norm": 5.6599, + "step": 828 + }, + { + "acc_char": 0.3048, + "acc_token": 0.7048, + "epoch": 0.02, + "loss_char": 0.6446, + "loss_token": 1.5444, + "lr": "2.00e-07", + "norm": 5.5, + "step": 829 + }, + { + "acc_char": 0.3019, + "acc_token": 0.6809, + "epoch": 0.02, + "loss_char": 0.7433, + "loss_token": 1.636, + "lr": "2.00e-07", + "norm": 5.4755, + "step": 830 + }, + { + "acc_char": 0.3, + "acc_token": 0.6857, + "epoch": 0.02, + "loss_char": 0.6815, + "loss_token": 1.5521, + "lr": "2.00e-07", + "norm": 5.5819, + "step": 831 + }, + { + "acc_char": 0.2923, + "acc_token": 0.6756, + "epoch": 0.02, + "loss_char": 0.7457, + "loss_token": 1.6655, + "lr": "2.00e-07", + "norm": 6.0816, + "step": 832 + }, + { + "acc_char": 0.2992, + "acc_token": 0.6766, + "epoch": 0.02, + "loss_char": 0.7569, + "loss_token": 1.6527, + "lr": "2.00e-07", + "norm": 5.7566, + "step": 833 + }, + { + "acc_char": 0.3021, + "acc_token": 0.6779, + "epoch": 0.02, + "loss_char": 0.7349, + "loss_token": 1.6231, + "lr": "2.00e-07", + "norm": 5.5129, + "step": 834 + }, + { + "acc_char": 0.3048, + "acc_token": 0.6798, + "epoch": 0.02, + "loss_char": 0.729, + "loss_token": 1.5387, + "lr": "2.00e-07", + "norm": 5.4065, + "step": 835 + }, + { + "acc_char": 0.2927, + "acc_token": 0.6659, + "epoch": 0.02, + "loss_char": 0.7933, + "loss_token": 1.6665, + "lr": "2.00e-07", + "norm": 5.7373, + "step": 836 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6878, + "epoch": 0.02, + "loss_char": 0.7152, + "loss_token": 1.5988, + "lr": "2.00e-07", + "norm": 6.0286, + "step": 837 + }, + { + "acc_char": 0.3144, + "acc_token": 0.6894, + "epoch": 0.02, + "loss_char": 0.7078, + "loss_token": 1.6339, + "lr": "2.00e-07", + "norm": 5.8277, + "step": 838 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6712, + "epoch": 0.02, + "loss_char": 0.7581, + "loss_token": 1.6225, + "lr": "2.00e-07", + "norm": 5.7859, + "step": 839 + }, + { + "acc_char": 0.3126, + "acc_token": 0.6887, + "epoch": 0.02, + "loss_char": 0.7221, + "loss_token": 1.5797, + "lr": "2.00e-07", + "norm": 5.7404, + "step": 840 + }, + { + "acc_char": 0.3171, + "acc_token": 0.7202, + "epoch": 0.02, + "loss_char": 0.5949, + "loss_token": 1.5088, + "lr": "2.00e-07", + "norm": 5.521, + "step": 841 + }, + { + "acc_char": 0.2944, + "acc_token": 0.6671, + "epoch": 0.02, + "loss_char": 0.7784, + "loss_token": 1.5698, + "lr": "2.00e-07", + "norm": 5.5683, + "step": 842 + }, + { + "acc_char": 0.3136, + "acc_token": 0.6978, + "epoch": 0.02, + "loss_char": 0.6885, + "loss_token": 1.539, + "lr": "2.00e-07", + "norm": 5.6271, + "step": 843 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6963, + "epoch": 0.02, + "loss_char": 0.6713, + "loss_token": 1.5745, + "lr": "2.00e-07", + "norm": 5.531, + "step": 844 + }, + { + "acc_char": 0.3143, + "acc_token": 0.7153, + "epoch": 0.02, + "loss_char": 0.6429, + "loss_token": 1.5588, + "lr": "2.00e-07", + "norm": 5.8076, + "step": 845 + }, + { + "acc_char": 0.3135, + "acc_token": 0.7175, + "epoch": 0.02, + "loss_char": 0.6098, + "loss_token": 1.5226, + "lr": "2.00e-07", + "norm": 5.5064, + "step": 846 + }, + { + "acc_char": 0.3018, + "acc_token": 0.6906, + "epoch": 0.02, + "loss_char": 0.6871, + "loss_token": 1.6122, + "lr": "2.00e-07", + "norm": 5.6541, + "step": 847 + }, + { + "acc_char": 0.312, + "acc_token": 0.6916, + "epoch": 0.02, + "loss_char": 0.7166, + "loss_token": 1.5057, + "lr": "2.00e-07", + "norm": 5.3657, + "step": 848 + }, + { + "acc_char": 0.3038, + "acc_token": 0.6949, + "epoch": 0.02, + "loss_char": 0.6808, + "loss_token": 1.5474, + "lr": "2.00e-07", + "norm": 5.3944, + "step": 849 + }, + { + "acc_char": 0.2928, + "acc_token": 0.6665, + "epoch": 0.02, + "loss_char": 0.7741, + "loss_token": 1.7224, + "lr": "2.00e-07", + "norm": 6.0661, + "step": 850 + }, + { + "acc_char": 0.3188, + "acc_token": 0.7099, + "epoch": 0.02, + "loss_char": 0.6563, + "loss_token": 1.5444, + "lr": "2.00e-07", + "norm": 5.7398, + "step": 851 + }, + { + "acc_char": 0.3117, + "acc_token": 0.7085, + "epoch": 0.02, + "loss_char": 0.6725, + "loss_token": 1.6115, + "lr": "2.00e-07", + "norm": 6.1062, + "step": 852 + }, + { + "acc_char": 0.2951, + "acc_token": 0.6786, + "epoch": 0.02, + "loss_char": 0.7135, + "loss_token": 1.586, + "lr": "2.00e-07", + "norm": 5.7301, + "step": 853 + }, + { + "acc_char": 0.3145, + "acc_token": 0.7019, + "epoch": 0.02, + "loss_char": 0.668, + "loss_token": 1.5912, + "lr": "2.00e-07", + "norm": 5.4874, + "step": 854 + }, + { + "acc_char": 0.3146, + "acc_token": 0.7107, + "epoch": 0.02, + "loss_char": 0.6492, + "loss_token": 1.5739, + "lr": "2.00e-07", + "norm": 5.5151, + "step": 855 + }, + { + "acc_char": 0.3019, + "acc_token": 0.6879, + "epoch": 0.02, + "loss_char": 0.7229, + "loss_token": 1.5687, + "lr": "2.00e-07", + "norm": 5.5906, + "step": 856 + }, + { + "acc_char": 0.2992, + "acc_token": 0.6701, + "epoch": 0.02, + "loss_char": 0.7858, + "loss_token": 1.6602, + "lr": "2.00e-07", + "norm": 6.0501, + "step": 857 + }, + { + "acc_char": 0.2985, + "acc_token": 0.6726, + "epoch": 0.02, + "loss_char": 0.7506, + "loss_token": 1.5568, + "lr": "2.00e-07", + "norm": 5.7594, + "step": 858 + }, + { + "acc_char": 0.283, + "acc_token": 0.651, + "epoch": 0.02, + "loss_char": 0.8111, + "loss_token": 1.7399, + "lr": "2.00e-07", + "norm": 5.8941, + "step": 859 + }, + { + "acc_char": 0.297, + "acc_token": 0.6891, + "epoch": 0.02, + "loss_char": 0.6818, + "loss_token": 1.5738, + "lr": "2.00e-07", + "norm": 5.8817, + "step": 860 + }, + { + "acc_char": 0.3067, + "acc_token": 0.6701, + "epoch": 0.02, + "loss_char": 0.7807, + "loss_token": 1.6976, + "lr": "2.00e-07", + "norm": 5.9143, + "step": 861 + }, + { + "acc_char": 0.3062, + "acc_token": 0.7005, + "epoch": 0.02, + "loss_char": 0.6652, + "loss_token": 1.5703, + "lr": "2.00e-07", + "norm": 5.8149, + "step": 862 + }, + { + "acc_char": 0.3088, + "acc_token": 0.7021, + "epoch": 0.02, + "loss_char": 0.6614, + "loss_token": 1.5577, + "lr": "2.00e-07", + "norm": 5.619, + "step": 863 + }, + { + "acc_char": 0.31, + "acc_token": 0.6976, + "epoch": 0.02, + "loss_char": 0.6798, + "loss_token": 1.5745, + "lr": "2.00e-07", + "norm": 5.7408, + "step": 864 + }, + { + "acc_char": 0.3075, + "acc_token": 0.6924, + "epoch": 0.02, + "loss_char": 0.7154, + "loss_token": 1.5676, + "lr": "2.00e-07", + "norm": 5.6581, + "step": 865 + }, + { + "acc_char": 0.3092, + "acc_token": 0.709, + "epoch": 0.02, + "loss_char": 0.6473, + "loss_token": 1.4729, + "lr": "2.00e-07", + "norm": 5.2921, + "step": 866 + }, + { + "acc_char": 0.2904, + "acc_token": 0.6714, + "epoch": 0.02, + "loss_char": 0.7233, + "loss_token": 1.6225, + "lr": "2.00e-07", + "norm": 5.5076, + "step": 867 + }, + { + "acc_char": 0.3063, + "acc_token": 0.6996, + "epoch": 0.02, + "loss_char": 0.6696, + "loss_token": 1.6228, + "lr": "2.00e-07", + "norm": 5.6327, + "step": 868 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6821, + "epoch": 0.02, + "loss_char": 0.7096, + "loss_token": 1.6779, + "lr": "2.00e-07", + "norm": 5.7236, + "step": 869 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6916, + "epoch": 0.02, + "loss_char": 0.6837, + "loss_token": 1.6087, + "lr": "2.00e-07", + "norm": 5.6206, + "step": 870 + }, + { + "acc_char": 0.3074, + "acc_token": 0.6979, + "epoch": 0.02, + "loss_char": 0.663, + "loss_token": 1.5133, + "lr": "2.00e-07", + "norm": 5.5679, + "step": 871 + }, + { + "acc_char": 0.2908, + "acc_token": 0.6608, + "epoch": 0.02, + "loss_char": 0.7855, + "loss_token": 1.6526, + "lr": "2.00e-07", + "norm": 5.7404, + "step": 872 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6746, + "epoch": 0.02, + "loss_char": 0.7258, + "loss_token": 1.6516, + "lr": "2.00e-07", + "norm": 5.6479, + "step": 873 + }, + { + "acc_char": 0.305, + "acc_token": 0.6764, + "epoch": 0.02, + "loss_char": 0.7713, + "loss_token": 1.6556, + "lr": "2.00e-07", + "norm": 5.7678, + "step": 874 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6732, + "epoch": 0.02, + "loss_char": 0.7498, + "loss_token": 1.737, + "lr": "2.00e-07", + "norm": 5.9202, + "step": 875 + }, + { + "acc_char": 0.3012, + "acc_token": 0.6684, + "epoch": 0.02, + "loss_char": 0.785, + "loss_token": 1.6429, + "lr": "2.00e-07", + "norm": 5.6248, + "step": 876 + }, + { + "acc_char": 0.3156, + "acc_token": 0.7224, + "epoch": 0.02, + "loss_char": 0.6204, + "loss_token": 1.5703, + "lr": "2.00e-07", + "norm": 5.5374, + "step": 877 + }, + { + "acc_char": 0.2873, + "acc_token": 0.6655, + "epoch": 0.02, + "loss_char": 0.7559, + "loss_token": 1.707, + "lr": "2.00e-07", + "norm": 5.889, + "step": 878 + }, + { + "acc_char": 0.2936, + "acc_token": 0.6815, + "epoch": 0.02, + "loss_char": 0.7047, + "loss_token": 1.621, + "lr": "2.00e-07", + "norm": 5.7028, + "step": 879 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6811, + "epoch": 0.02, + "loss_char": 0.7095, + "loss_token": 1.5844, + "lr": "2.00e-07", + "norm": 5.574, + "step": 880 + }, + { + "acc_char": 0.3103, + "acc_token": 0.6643, + "epoch": 0.02, + "loss_char": 0.8205, + "loss_token": 1.7376, + "lr": "2.00e-07", + "norm": 5.7651, + "step": 881 + }, + { + "acc_char": 0.2889, + "acc_token": 0.6652, + "epoch": 0.02, + "loss_char": 0.7952, + "loss_token": 1.7347, + "lr": "2.00e-07", + "norm": 11.8228, + "step": 882 + }, + { + "acc_char": 0.2937, + "acc_token": 0.664, + "epoch": 0.02, + "loss_char": 0.7846, + "loss_token": 1.6037, + "lr": "2.00e-07", + "norm": 5.556, + "step": 883 + }, + { + "acc_char": 0.3034, + "acc_token": 0.6775, + "epoch": 0.02, + "loss_char": 0.7591, + "loss_token": 1.6535, + "lr": "2.00e-07", + "norm": 5.5967, + "step": 884 + }, + { + "acc_char": 0.3091, + "acc_token": 0.6898, + "epoch": 0.02, + "loss_char": 0.7304, + "loss_token": 1.594, + "lr": "2.00e-07", + "norm": 5.4046, + "step": 885 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6882, + "epoch": 0.02, + "loss_char": 0.6872, + "loss_token": 1.6064, + "lr": "2.00e-07", + "norm": 5.8358, + "step": 886 + }, + { + "acc_char": 0.3042, + "acc_token": 0.6817, + "epoch": 0.02, + "loss_char": 0.7236, + "loss_token": 1.7433, + "lr": "2.00e-07", + "norm": 5.7178, + "step": 887 + }, + { + "acc_char": 0.2988, + "acc_token": 0.6754, + "epoch": 0.02, + "loss_char": 0.729, + "loss_token": 1.6565, + "lr": "2.00e-07", + "norm": 5.9591, + "step": 888 + }, + { + "acc_char": 0.2986, + "acc_token": 0.6801, + "epoch": 0.02, + "loss_char": 0.7253, + "loss_token": 1.6613, + "lr": "2.00e-07", + "norm": 5.7309, + "step": 889 + }, + { + "acc_char": 0.295, + "acc_token": 0.6737, + "epoch": 0.02, + "loss_char": 0.7399, + "loss_token": 1.6015, + "lr": "2.00e-07", + "norm": 5.4777, + "step": 890 + }, + { + "acc_char": 0.2896, + "acc_token": 0.666, + "epoch": 0.02, + "loss_char": 0.7702, + "loss_token": 1.6573, + "lr": "2.00e-07", + "norm": 6.1023, + "step": 891 + }, + { + "acc_char": 0.2845, + "acc_token": 0.6454, + "epoch": 0.02, + "loss_char": 0.8636, + "loss_token": 1.7583, + "lr": "2.00e-07", + "norm": 5.9048, + "step": 892 + }, + { + "acc_char": 0.2889, + "acc_token": 0.6415, + "epoch": 0.02, + "loss_char": 0.8748, + "loss_token": 1.7602, + "lr": "2.00e-07", + "norm": 5.9197, + "step": 893 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6774, + "epoch": 0.02, + "loss_char": 0.7214, + "loss_token": 1.6364, + "lr": "2.00e-07", + "norm": 5.4568, + "step": 894 + }, + { + "acc_char": 0.3245, + "acc_token": 0.7212, + "epoch": 0.02, + "loss_char": 0.6279, + "loss_token": 1.466, + "lr": "2.00e-07", + "norm": 5.5303, + "step": 895 + }, + { + "acc_char": 0.3241, + "acc_token": 0.7322, + "epoch": 0.02, + "loss_char": 0.5878, + "loss_token": 1.4649, + "lr": "2.00e-07", + "norm": 5.4387, + "step": 896 + }, + { + "acc_char": 0.3078, + "acc_token": 0.6898, + "epoch": 0.02, + "loss_char": 0.7139, + "loss_token": 1.5937, + "lr": "2.00e-07", + "norm": 5.5562, + "step": 897 + }, + { + "acc_char": 0.2982, + "acc_token": 0.6713, + "epoch": 0.02, + "loss_char": 0.7894, + "loss_token": 1.685, + "lr": "2.00e-07", + "norm": 5.6968, + "step": 898 + }, + { + "acc_char": 0.2958, + "acc_token": 0.6649, + "epoch": 0.02, + "loss_char": 0.7826, + "loss_token": 1.7361, + "lr": "2.00e-07", + "norm": 6.044, + "step": 899 + }, + { + "acc_char": 0.3078, + "acc_token": 0.6906, + "epoch": 0.02, + "loss_char": 0.7139, + "loss_token": 1.5897, + "lr": "2.00e-07", + "norm": 5.6098, + "step": 900 + }, + { + "acc_char": 0.2989, + "acc_token": 0.6848, + "epoch": 0.02, + "loss_char": 0.7267, + "loss_token": 1.6437, + "lr": "2.00e-07", + "norm": 6.034, + "step": 901 + }, + { + "acc_char": 0.3091, + "acc_token": 0.6975, + "epoch": 0.02, + "loss_char": 0.7161, + "loss_token": 1.5962, + "lr": "2.00e-07", + "norm": 5.8044, + "step": 902 + }, + { + "acc_char": 0.308, + "acc_token": 0.6939, + "epoch": 0.02, + "loss_char": 0.6884, + "loss_token": 1.585, + "lr": "2.00e-07", + "norm": 5.7306, + "step": 903 + }, + { + "acc_char": 0.3086, + "acc_token": 0.6925, + "epoch": 0.02, + "loss_char": 0.7179, + "loss_token": 1.6073, + "lr": "2.00e-07", + "norm": 5.7369, + "step": 904 + }, + { + "acc_char": 0.2908, + "acc_token": 0.6717, + "epoch": 0.02, + "loss_char": 0.7543, + "loss_token": 1.6996, + "lr": "2.00e-07", + "norm": 5.7256, + "step": 905 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6958, + "epoch": 0.02, + "loss_char": 0.6856, + "loss_token": 1.585, + "lr": "2.00e-07", + "norm": 5.6225, + "step": 906 + }, + { + "acc_char": 0.3088, + "acc_token": 0.6996, + "epoch": 0.02, + "loss_char": 0.6747, + "loss_token": 1.5768, + "lr": "2.00e-07", + "norm": 5.7417, + "step": 907 + }, + { + "acc_char": 0.3177, + "acc_token": 0.7195, + "epoch": 0.02, + "loss_char": 0.6313, + "loss_token": 1.5231, + "lr": "2.00e-07", + "norm": 5.7196, + "step": 908 + }, + { + "acc_char": 0.3083, + "acc_token": 0.6958, + "epoch": 0.02, + "loss_char": 0.6694, + "loss_token": 1.635, + "lr": "2.00e-07", + "norm": 6.0173, + "step": 909 + }, + { + "acc_char": 0.2915, + "acc_token": 0.6698, + "epoch": 0.02, + "loss_char": 0.7286, + "loss_token": 1.6788, + "lr": "2.00e-07", + "norm": 5.6674, + "step": 910 + }, + { + "acc_char": 0.3072, + "acc_token": 0.689, + "epoch": 0.02, + "loss_char": 0.7164, + "loss_token": 1.6722, + "lr": "2.00e-07", + "norm": 5.83, + "step": 911 + }, + { + "acc_char": 0.2934, + "acc_token": 0.6323, + "epoch": 0.02, + "loss_char": 0.8456, + "loss_token": 1.7197, + "lr": "2.00e-07", + "norm": 5.5935, + "step": 912 + }, + { + "acc_char": 0.297, + "acc_token": 0.679, + "epoch": 0.02, + "loss_char": 0.728, + "loss_token": 1.6382, + "lr": "2.00e-07", + "norm": 5.7187, + "step": 913 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6892, + "epoch": 0.02, + "loss_char": 0.7052, + "loss_token": 1.5839, + "lr": "2.00e-07", + "norm": 5.7948, + "step": 914 + }, + { + "acc_char": 0.3211, + "acc_token": 0.7173, + "epoch": 0.02, + "loss_char": 0.6406, + "loss_token": 1.5502, + "lr": "2.00e-07", + "norm": 5.5802, + "step": 915 + }, + { + "acc_char": 0.2919, + "acc_token": 0.6749, + "epoch": 0.02, + "loss_char": 0.7306, + "loss_token": 1.6292, + "lr": "2.00e-07", + "norm": 5.6494, + "step": 916 + }, + { + "acc_char": 0.2947, + "acc_token": 0.6714, + "epoch": 0.02, + "loss_char": 0.7284, + "loss_token": 1.692, + "lr": "2.00e-07", + "norm": 5.5497, + "step": 917 + }, + { + "acc_char": 0.3015, + "acc_token": 0.6835, + "epoch": 0.02, + "loss_char": 0.7163, + "loss_token": 1.5713, + "lr": "2.00e-07", + "norm": 5.8363, + "step": 918 + }, + { + "acc_char": 0.3016, + "acc_token": 0.6719, + "epoch": 0.02, + "loss_char": 0.7656, + "loss_token": 1.7156, + "lr": "2.00e-07", + "norm": 5.9585, + "step": 919 + }, + { + "acc_char": 0.2974, + "acc_token": 0.686, + "epoch": 0.02, + "loss_char": 0.706, + "loss_token": 1.6797, + "lr": "2.00e-07", + "norm": 5.858, + "step": 920 + }, + { + "acc_char": 0.3023, + "acc_token": 0.6833, + "epoch": 0.02, + "loss_char": 0.737, + "loss_token": 1.6113, + "lr": "2.00e-07", + "norm": 5.7094, + "step": 921 + }, + { + "acc_char": 0.2893, + "acc_token": 0.6675, + "epoch": 0.02, + "loss_char": 0.7461, + "loss_token": 1.6808, + "lr": "2.00e-07", + "norm": 5.8515, + "step": 922 + }, + { + "acc_char": 0.2925, + "acc_token": 0.634, + "epoch": 0.02, + "loss_char": 0.8618, + "loss_token": 1.7723, + "lr": "2.00e-07", + "norm": 5.5071, + "step": 923 + }, + { + "acc_char": 0.3084, + "acc_token": 0.7052, + "epoch": 0.02, + "loss_char": 0.6395, + "loss_token": 1.6118, + "lr": "2.00e-07", + "norm": 6.0085, + "step": 924 + }, + { + "acc_char": 0.3117, + "acc_token": 0.6981, + "epoch": 0.02, + "loss_char": 0.686, + "loss_token": 1.5, + "lr": "2.00e-07", + "norm": 6.0124, + "step": 925 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6853, + "epoch": 0.02, + "loss_char": 0.7184, + "loss_token": 1.6244, + "lr": "2.00e-07", + "norm": 5.9832, + "step": 926 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6876, + "epoch": 0.02, + "loss_char": 0.6786, + "loss_token": 1.6284, + "lr": "2.00e-07", + "norm": 5.725, + "step": 927 + }, + { + "acc_char": 0.3066, + "acc_token": 0.7078, + "epoch": 0.02, + "loss_char": 0.6441, + "loss_token": 1.4689, + "lr": "2.00e-07", + "norm": 5.4178, + "step": 928 + }, + { + "acc_char": 0.3015, + "acc_token": 0.6397, + "epoch": 0.02, + "loss_char": 0.8701, + "loss_token": 1.7746, + "lr": "2.00e-07", + "norm": 5.6169, + "step": 929 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6762, + "epoch": 0.02, + "loss_char": 0.7557, + "loss_token": 1.6987, + "lr": "2.00e-07", + "norm": 5.8867, + "step": 930 + }, + { + "acc_char": 0.2994, + "acc_token": 0.672, + "epoch": 0.02, + "loss_char": 0.7681, + "loss_token": 1.7252, + "lr": "2.00e-07", + "norm": 5.9723, + "step": 931 + }, + { + "acc_char": 0.3008, + "acc_token": 0.677, + "epoch": 0.02, + "loss_char": 0.7565, + "loss_token": 1.706, + "lr": "2.00e-07", + "norm": 5.9506, + "step": 932 + }, + { + "acc_char": 0.3066, + "acc_token": 0.6942, + "epoch": 0.02, + "loss_char": 0.6873, + "loss_token": 1.5338, + "lr": "2.00e-07", + "norm": 5.9416, + "step": 933 + }, + { + "acc_char": 0.3081, + "acc_token": 0.7002, + "epoch": 0.02, + "loss_char": 0.6803, + "loss_token": 1.5462, + "lr": "2.00e-07", + "norm": 5.6716, + "step": 934 + }, + { + "acc_char": 0.3191, + "acc_token": 0.7165, + "epoch": 0.02, + "loss_char": 0.6339, + "loss_token": 1.493, + "lr": "2.00e-07", + "norm": 5.4216, + "step": 935 + }, + { + "acc_char": 0.2913, + "acc_token": 0.6643, + "epoch": 0.02, + "loss_char": 0.7805, + "loss_token": 1.7313, + "lr": "2.00e-07", + "norm": 5.9457, + "step": 936 + }, + { + "acc_char": 0.3171, + "acc_token": 0.7187, + "epoch": 0.02, + "loss_char": 0.6301, + "loss_token": 1.5637, + "lr": "2.00e-07", + "norm": 5.7545, + "step": 937 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6958, + "epoch": 0.02, + "loss_char": 0.6851, + "loss_token": 1.5865, + "lr": "2.00e-07", + "norm": 5.792, + "step": 938 + }, + { + "acc_char": 0.2989, + "acc_token": 0.6709, + "epoch": 0.02, + "loss_char": 0.7519, + "loss_token": 1.6043, + "lr": "2.00e-07", + "norm": 5.531, + "step": 939 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6756, + "epoch": 0.02, + "loss_char": 0.7521, + "loss_token": 1.6099, + "lr": "2.00e-07", + "norm": 5.4071, + "step": 940 + }, + { + "acc_char": 0.2772, + "acc_token": 0.6351, + "epoch": 0.02, + "loss_char": 0.8474, + "loss_token": 1.7509, + "lr": "2.00e-07", + "norm": 5.956, + "step": 941 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6867, + "epoch": 0.02, + "loss_char": 0.7244, + "loss_token": 1.66, + "lr": "2.00e-07", + "norm": 5.5928, + "step": 942 + }, + { + "acc_char": 0.2985, + "acc_token": 0.6779, + "epoch": 0.02, + "loss_char": 0.7319, + "loss_token": 1.7502, + "lr": "2.00e-07", + "norm": 5.6872, + "step": 943 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6762, + "epoch": 0.02, + "loss_char": 0.7591, + "loss_token": 1.6157, + "lr": "2.00e-07", + "norm": 5.5877, + "step": 944 + }, + { + "acc_char": 0.2953, + "acc_token": 0.6704, + "epoch": 0.02, + "loss_char": 0.7453, + "loss_token": 1.549, + "lr": "2.00e-07", + "norm": 5.4127, + "step": 945 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6776, + "epoch": 0.02, + "loss_char": 0.7399, + "loss_token": 1.5651, + "lr": "2.00e-07", + "norm": 5.3875, + "step": 946 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6851, + "epoch": 0.02, + "loss_char": 0.7664, + "loss_token": 1.6692, + "lr": "2.00e-07", + "norm": 6.3977, + "step": 947 + }, + { + "acc_char": 0.2952, + "acc_token": 0.6804, + "epoch": 0.02, + "loss_char": 0.7196, + "loss_token": 1.5708, + "lr": "2.00e-07", + "norm": 5.3926, + "step": 948 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6881, + "epoch": 0.02, + "loss_char": 0.7196, + "loss_token": 1.6602, + "lr": "2.00e-07", + "norm": 5.7608, + "step": 949 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6757, + "epoch": 0.02, + "loss_char": 0.7133, + "loss_token": 1.4978, + "lr": "2.00e-07", + "norm": 5.4931, + "step": 950 + }, + { + "acc_char": 0.2858, + "acc_token": 0.6277, + "epoch": 0.02, + "loss_char": 0.8623, + "loss_token": 1.6958, + "lr": "2.00e-07", + "norm": 5.3998, + "step": 951 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6793, + "epoch": 0.02, + "loss_char": 0.7242, + "loss_token": 1.5484, + "lr": "2.00e-07", + "norm": 5.8509, + "step": 952 + }, + { + "acc_char": 0.2958, + "acc_token": 0.6784, + "epoch": 0.02, + "loss_char": 0.7124, + "loss_token": 1.6386, + "lr": "2.00e-07", + "norm": 5.6745, + "step": 953 + }, + { + "acc_char": 0.3102, + "acc_token": 0.6879, + "epoch": 0.02, + "loss_char": 0.7138, + "loss_token": 1.5752, + "lr": "2.00e-07", + "norm": 5.6627, + "step": 954 + }, + { + "acc_char": 0.314, + "acc_token": 0.7037, + "epoch": 0.02, + "loss_char": 0.6819, + "loss_token": 1.6394, + "lr": "2.00e-07", + "norm": 5.8967, + "step": 955 + }, + { + "acc_char": 0.295, + "acc_token": 0.6637, + "epoch": 0.02, + "loss_char": 0.787, + "loss_token": 1.7269, + "lr": "2.00e-07", + "norm": 6.0271, + "step": 956 + }, + { + "acc_char": 0.2884, + "acc_token": 0.6678, + "epoch": 0.02, + "loss_char": 0.7588, + "loss_token": 1.6674, + "lr": "2.00e-07", + "norm": 5.669, + "step": 957 + }, + { + "acc_char": 0.2978, + "acc_token": 0.6848, + "epoch": 0.02, + "loss_char": 0.7159, + "loss_token": 1.567, + "lr": "2.00e-07", + "norm": 5.3287, + "step": 958 + }, + { + "acc_char": 0.294, + "acc_token": 0.6637, + "epoch": 0.02, + "loss_char": 0.7728, + "loss_token": 1.6319, + "lr": "2.00e-07", + "norm": 5.9871, + "step": 959 + }, + { + "acc_char": 0.2899, + "acc_token": 0.672, + "epoch": 0.02, + "loss_char": 0.7255, + "loss_token": 1.6181, + "lr": "2.00e-07", + "norm": 5.4521, + "step": 960 + }, + { + "acc_char": 0.3153, + "acc_token": 0.7082, + "epoch": 0.02, + "loss_char": 0.6676, + "loss_token": 1.6058, + "lr": "2.00e-07", + "norm": 6.0036, + "step": 961 + }, + { + "acc_char": 0.2989, + "acc_token": 0.6886, + "epoch": 0.02, + "loss_char": 0.6855, + "loss_token": 1.6063, + "lr": "2.00e-07", + "norm": 5.8428, + "step": 962 + }, + { + "acc_char": 0.2953, + "acc_token": 0.6828, + "epoch": 0.02, + "loss_char": 0.6914, + "loss_token": 1.6176, + "lr": "2.00e-07", + "norm": 5.704, + "step": 963 + }, + { + "acc_char": 0.2963, + "acc_token": 0.6692, + "epoch": 0.02, + "loss_char": 0.7661, + "loss_token": 1.6847, + "lr": "2.00e-07", + "norm": 5.8579, + "step": 964 + }, + { + "acc_char": 0.303, + "acc_token": 0.6859, + "epoch": 0.02, + "loss_char": 0.7015, + "loss_token": 1.5478, + "lr": "2.00e-07", + "norm": 6.1629, + "step": 965 + }, + { + "acc_char": 0.3015, + "acc_token": 0.6883, + "epoch": 0.02, + "loss_char": 0.7104, + "loss_token": 1.6628, + "lr": "2.00e-07", + "norm": 5.7657, + "step": 966 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6608, + "epoch": 0.02, + "loss_char": 0.7851, + "loss_token": 1.6136, + "lr": "2.00e-07", + "norm": 5.6574, + "step": 967 + }, + { + "acc_char": 0.2931, + "acc_token": 0.6716, + "epoch": 0.02, + "loss_char": 0.7285, + "loss_token": 1.5881, + "lr": "2.00e-07", + "norm": 5.9718, + "step": 968 + }, + { + "acc_char": 0.31, + "acc_token": 0.7031, + "epoch": 0.02, + "loss_char": 0.6727, + "loss_token": 1.5546, + "lr": "2.00e-07", + "norm": 5.628, + "step": 969 + }, + { + "acc_char": 0.308, + "acc_token": 0.6854, + "epoch": 0.02, + "loss_char": 0.7318, + "loss_token": 1.6251, + "lr": "2.00e-07", + "norm": 5.7469, + "step": 970 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6873, + "epoch": 0.02, + "loss_char": 0.7109, + "loss_token": 1.6241, + "lr": "2.00e-07", + "norm": 5.4993, + "step": 971 + }, + { + "acc_char": 0.2966, + "acc_token": 0.6401, + "epoch": 0.02, + "loss_char": 0.8435, + "loss_token": 1.7002, + "lr": "2.00e-07", + "norm": 5.4046, + "step": 972 + }, + { + "acc_char": 0.2896, + "acc_token": 0.6557, + "epoch": 0.02, + "loss_char": 0.7916, + "loss_token": 1.6553, + "lr": "2.00e-07", + "norm": 5.9539, + "step": 973 + }, + { + "acc_char": 0.3168, + "acc_token": 0.7225, + "epoch": 0.02, + "loss_char": 0.6251, + "loss_token": 1.5137, + "lr": "2.00e-07", + "norm": 5.4914, + "step": 974 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6696, + "epoch": 0.02, + "loss_char": 0.7631, + "loss_token": 1.597, + "lr": "2.00e-07", + "norm": 5.8429, + "step": 975 + }, + { + "acc_char": 0.3112, + "acc_token": 0.7, + "epoch": 0.02, + "loss_char": 0.6922, + "loss_token": 1.5982, + "lr": "2.00e-07", + "norm": 5.5825, + "step": 976 + }, + { + "acc_char": 0.2958, + "acc_token": 0.6691, + "epoch": 0.02, + "loss_char": 0.7646, + "loss_token": 1.6608, + "lr": "2.00e-07", + "norm": 5.9167, + "step": 977 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6676, + "epoch": 0.02, + "loss_char": 0.7549, + "loss_token": 1.6464, + "lr": "2.00e-07", + "norm": 5.6152, + "step": 978 + }, + { + "acc_char": 0.2952, + "acc_token": 0.6787, + "epoch": 0.02, + "loss_char": 0.7135, + "loss_token": 1.5947, + "lr": "2.00e-07", + "norm": 5.8565, + "step": 979 + }, + { + "acc_char": 0.2982, + "acc_token": 0.6622, + "epoch": 0.02, + "loss_char": 0.7917, + "loss_token": 1.61, + "lr": "2.00e-07", + "norm": 5.9463, + "step": 980 + }, + { + "acc_char": 0.3189, + "acc_token": 0.708, + "epoch": 0.02, + "loss_char": 0.6644, + "loss_token": 1.5372, + "lr": "2.00e-07", + "norm": 5.4684, + "step": 981 + }, + { + "acc_char": 0.3102, + "acc_token": 0.7074, + "epoch": 0.02, + "loss_char": 0.658, + "loss_token": 1.5625, + "lr": "2.00e-07", + "norm": 5.8835, + "step": 982 + }, + { + "acc_char": 0.306, + "acc_token": 0.6826, + "epoch": 0.02, + "loss_char": 0.7437, + "loss_token": 1.5536, + "lr": "2.00e-07", + "norm": 5.7835, + "step": 983 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6837, + "epoch": 0.02, + "loss_char": 0.7336, + "loss_token": 1.6808, + "lr": "2.00e-07", + "norm": 6.0116, + "step": 984 + }, + { + "acc_char": 0.3081, + "acc_token": 0.7027, + "epoch": 0.02, + "loss_char": 0.6454, + "loss_token": 1.4823, + "lr": "2.00e-07", + "norm": 5.621, + "step": 985 + }, + { + "acc_char": 0.3, + "acc_token": 0.674, + "epoch": 0.02, + "loss_char": 0.7582, + "loss_token": 1.7042, + "lr": "2.00e-07", + "norm": 5.4723, + "step": 986 + }, + { + "acc_char": 0.3021, + "acc_token": 0.6848, + "epoch": 0.02, + "loss_char": 0.721, + "loss_token": 1.5896, + "lr": "2.00e-07", + "norm": 5.5576, + "step": 987 + }, + { + "acc_char": 0.2973, + "acc_token": 0.675, + "epoch": 0.02, + "loss_char": 0.7316, + "loss_token": 1.6535, + "lr": "2.00e-07", + "norm": 6.3758, + "step": 988 + }, + { + "acc_char": 0.2932, + "acc_token": 0.6709, + "epoch": 0.02, + "loss_char": 0.7234, + "loss_token": 1.6849, + "lr": "2.00e-07", + "norm": 6.0452, + "step": 989 + }, + { + "acc_char": 0.3049, + "acc_token": 0.6874, + "epoch": 0.02, + "loss_char": 0.7124, + "loss_token": 1.6068, + "lr": "2.00e-07", + "norm": 5.6446, + "step": 990 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6794, + "epoch": 0.02, + "loss_char": 0.7833, + "loss_token": 1.6207, + "lr": "2.00e-07", + "norm": 5.8083, + "step": 991 + }, + { + "acc_char": 0.3044, + "acc_token": 0.7026, + "epoch": 0.02, + "loss_char": 0.6677, + "loss_token": 1.6228, + "lr": "2.00e-07", + "norm": 5.8483, + "step": 992 + }, + { + "acc_char": 0.2968, + "acc_token": 0.6706, + "epoch": 0.02, + "loss_char": 0.7411, + "loss_token": 1.6051, + "lr": "2.00e-07", + "norm": 5.6637, + "step": 993 + }, + { + "acc_char": 0.2939, + "acc_token": 0.6371, + "epoch": 0.02, + "loss_char": 0.8373, + "loss_token": 1.6046, + "lr": "2.00e-07", + "norm": 5.2617, + "step": 994 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6698, + "epoch": 0.02, + "loss_char": 0.7605, + "loss_token": 1.7639, + "lr": "2.00e-07", + "norm": 6.172, + "step": 995 + }, + { + "acc_char": 0.3038, + "acc_token": 0.6939, + "epoch": 0.02, + "loss_char": 0.665, + "loss_token": 1.5734, + "lr": "2.00e-07", + "norm": 5.5388, + "step": 996 + }, + { + "acc_char": 0.2934, + "acc_token": 0.6633, + "epoch": 0.02, + "loss_char": 0.7859, + "loss_token": 1.7921, + "lr": "2.00e-07", + "norm": 5.8554, + "step": 997 + }, + { + "acc_char": 0.309, + "acc_token": 0.6991, + "epoch": 0.02, + "loss_char": 0.6785, + "loss_token": 1.5595, + "lr": "2.00e-07", + "norm": 5.6781, + "step": 998 + }, + { + "acc_char": 0.2965, + "acc_token": 0.6636, + "epoch": 0.02, + "loss_char": 0.796, + "loss_token": 1.8083, + "lr": "2.00e-07", + "norm": 6.3303, + "step": 999 + }, + { + "acc_char": 0.2988, + "acc_token": 0.6845, + "epoch": 0.02, + "loss_char": 0.6922, + "loss_token": 1.5965, + "lr": "2.00e-07", + "norm": 5.5313, + "step": 1000 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6778, + "epoch": 0.02, + "loss_char": 0.7077, + "loss_token": 1.6731, + "lr": "2.00e-07", + "norm": 5.7537, + "step": 1001 + }, + { + "acc_char": 0.3086, + "acc_token": 0.6934, + "epoch": 0.02, + "loss_char": 0.7039, + "loss_token": 1.6362, + "lr": "2.00e-07", + "norm": 5.7014, + "step": 1002 + }, + { + "acc_char": 0.3148, + "acc_token": 0.6991, + "epoch": 0.02, + "loss_char": 0.6969, + "loss_token": 1.5178, + "lr": "2.00e-07", + "norm": 5.6035, + "step": 1003 + }, + { + "acc_char": 0.3199, + "acc_token": 0.7144, + "epoch": 0.02, + "loss_char": 0.648, + "loss_token": 1.5337, + "lr": "2.00e-07", + "norm": 5.6349, + "step": 1004 + }, + { + "acc_char": 0.2994, + "acc_token": 0.6819, + "epoch": 0.02, + "loss_char": 0.7368, + "loss_token": 1.6663, + "lr": "2.00e-07", + "norm": 6.0038, + "step": 1005 + }, + { + "acc_char": 0.3102, + "acc_token": 0.7065, + "epoch": 0.02, + "loss_char": 0.6671, + "loss_token": 1.4836, + "lr": "2.00e-07", + "norm": 5.5694, + "step": 1006 + }, + { + "acc_char": 0.3098, + "acc_token": 0.7015, + "epoch": 0.02, + "loss_char": 0.6681, + "loss_token": 1.5047, + "lr": "2.00e-07", + "norm": 5.6658, + "step": 1007 + }, + { + "acc_char": 0.3133, + "acc_token": 0.696, + "epoch": 0.02, + "loss_char": 0.7178, + "loss_token": 1.5725, + "lr": "2.00e-07", + "norm": 5.6305, + "step": 1008 + }, + { + "acc_char": 0.3118, + "acc_token": 0.7075, + "epoch": 0.02, + "loss_char": 0.6562, + "loss_token": 1.6071, + "lr": "2.00e-07", + "norm": 5.8318, + "step": 1009 + }, + { + "acc_char": 0.299, + "acc_token": 0.6798, + "epoch": 0.02, + "loss_char": 0.7177, + "loss_token": 1.6163, + "lr": "2.00e-07", + "norm": 6.018, + "step": 1010 + }, + { + "acc_char": 0.3028, + "acc_token": 0.6844, + "epoch": 0.02, + "loss_char": 0.7275, + "loss_token": 1.6145, + "lr": "2.00e-07", + "norm": 5.5871, + "step": 1011 + }, + { + "acc_char": 0.307, + "acc_token": 0.6897, + "epoch": 0.02, + "loss_char": 0.692, + "loss_token": 1.6421, + "lr": "2.00e-07", + "norm": 5.5714, + "step": 1012 + }, + { + "acc_char": 0.3302, + "acc_token": 0.7278, + "epoch": 0.02, + "loss_char": 0.6332, + "loss_token": 1.4216, + "lr": "2.00e-07", + "norm": 5.5367, + "step": 1013 + }, + { + "acc_char": 0.301, + "acc_token": 0.6804, + "epoch": 0.02, + "loss_char": 0.715, + "loss_token": 1.5587, + "lr": "2.00e-07", + "norm": 5.5248, + "step": 1014 + }, + { + "acc_char": 0.312, + "acc_token": 0.7007, + "epoch": 0.02, + "loss_char": 0.6859, + "loss_token": 1.5663, + "lr": "2.00e-07", + "norm": 5.7662, + "step": 1015 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6801, + "epoch": 0.02, + "loss_char": 0.7289, + "loss_token": 1.7149, + "lr": "2.00e-07", + "norm": 6.2105, + "step": 1016 + }, + { + "acc_char": 0.2963, + "acc_token": 0.6727, + "epoch": 0.02, + "loss_char": 0.7366, + "loss_token": 1.6419, + "lr": "2.00e-07", + "norm": 5.5471, + "step": 1017 + }, + { + "acc_char": 0.2947, + "acc_token": 0.6715, + "epoch": 0.02, + "loss_char": 0.7429, + "loss_token": 1.6365, + "lr": "2.00e-07", + "norm": 5.8369, + "step": 1018 + }, + { + "acc_char": 0.2834, + "acc_token": 0.6051, + "epoch": 0.02, + "loss_char": 0.9581, + "loss_token": 1.7791, + "lr": "2.00e-07", + "norm": 5.8307, + "step": 1019 + }, + { + "acc_char": 0.3075, + "acc_token": 0.7017, + "epoch": 0.02, + "loss_char": 0.6708, + "loss_token": 1.6284, + "lr": "2.00e-07", + "norm": 5.6093, + "step": 1020 + }, + { + "acc_char": 0.2883, + "acc_token": 0.6585, + "epoch": 0.02, + "loss_char": 0.7865, + "loss_token": 1.6614, + "lr": "2.00e-07", + "norm": 5.5822, + "step": 1021 + }, + { + "acc_char": 0.3119, + "acc_token": 0.7002, + "epoch": 0.02, + "loss_char": 0.6822, + "loss_token": 1.522, + "lr": "2.00e-07", + "norm": 5.5264, + "step": 1022 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6818, + "epoch": 0.02, + "loss_char": 0.7099, + "loss_token": 1.6397, + "lr": "2.00e-07", + "norm": 6.0578, + "step": 1023 + }, + { + "acc_char": 0.2979, + "acc_token": 0.6682, + "epoch": 0.02, + "loss_char": 0.7661, + "loss_token": 1.5846, + "lr": "2.00e-07", + "norm": 5.6649, + "step": 1024 + }, + { + "acc_char": 0.3115, + "acc_token": 0.7107, + "epoch": 0.02, + "loss_char": 0.6384, + "loss_token": 1.4569, + "lr": "2.00e-07", + "norm": 5.8935, + "step": 1025 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6913, + "epoch": 0.02, + "loss_char": 0.7082, + "loss_token": 1.6987, + "lr": "2.00e-07", + "norm": 5.8197, + "step": 1026 + }, + { + "acc_char": 0.3048, + "acc_token": 0.701, + "epoch": 0.02, + "loss_char": 0.6587, + "loss_token": 1.5762, + "lr": "2.00e-07", + "norm": 5.9085, + "step": 1027 + }, + { + "acc_char": 0.2939, + "acc_token": 0.664, + "epoch": 0.02, + "loss_char": 0.7915, + "loss_token": 1.6195, + "lr": "2.00e-07", + "norm": 6.1612, + "step": 1028 + }, + { + "acc_char": 0.2942, + "acc_token": 0.6608, + "epoch": 0.02, + "loss_char": 0.795, + "loss_token": 1.7062, + "lr": "2.00e-07", + "norm": 5.9826, + "step": 1029 + }, + { + "acc_char": 0.302, + "acc_token": 0.6942, + "epoch": 0.02, + "loss_char": 0.679, + "loss_token": 1.5238, + "lr": "2.00e-07", + "norm": 5.5649, + "step": 1030 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6598, + "epoch": 0.02, + "loss_char": 0.7892, + "loss_token": 1.6886, + "lr": "2.00e-07", + "norm": 5.8533, + "step": 1031 + }, + { + "acc_char": 0.3043, + "acc_token": 0.6845, + "epoch": 0.02, + "loss_char": 0.7398, + "loss_token": 1.6375, + "lr": "2.00e-07", + "norm": 5.9787, + "step": 1032 + }, + { + "acc_char": 0.3008, + "acc_token": 0.6733, + "epoch": 0.02, + "loss_char": 0.7634, + "loss_token": 1.7155, + "lr": "2.00e-07", + "norm": 6.2085, + "step": 1033 + }, + { + "acc_char": 0.3073, + "acc_token": 0.7069, + "epoch": 0.02, + "loss_char": 0.646, + "loss_token": 1.5893, + "lr": "2.00e-07", + "norm": 5.4341, + "step": 1034 + }, + { + "acc_char": 0.2876, + "acc_token": 0.6591, + "epoch": 0.02, + "loss_char": 0.7724, + "loss_token": 1.714, + "lr": "2.00e-07", + "norm": 6.0691, + "step": 1035 + }, + { + "acc_char": 0.3212, + "acc_token": 0.694, + "epoch": 0.02, + "loss_char": 0.7451, + "loss_token": 1.6876, + "lr": "2.00e-07", + "norm": 5.8539, + "step": 1036 + }, + { + "acc_char": 0.3072, + "acc_token": 0.6944, + "epoch": 0.02, + "loss_char": 0.694, + "loss_token": 1.644, + "lr": "2.00e-07", + "norm": 5.9415, + "step": 1037 + }, + { + "acc_char": 0.3083, + "acc_token": 0.704, + "epoch": 0.02, + "loss_char": 0.663, + "loss_token": 1.5913, + "lr": "2.00e-07", + "norm": 5.752, + "step": 1038 + }, + { + "acc_char": 0.3034, + "acc_token": 0.6902, + "epoch": 0.02, + "loss_char": 0.6879, + "loss_token": 1.5627, + "lr": "2.00e-07", + "norm": 5.4879, + "step": 1039 + }, + { + "acc_char": 0.3045, + "acc_token": 0.693, + "epoch": 0.02, + "loss_char": 0.7045, + "loss_token": 1.6542, + "lr": "2.00e-07", + "norm": 5.9541, + "step": 1040 + }, + { + "acc_char": 0.2985, + "acc_token": 0.6826, + "epoch": 0.02, + "loss_char": 0.7156, + "loss_token": 1.6487, + "lr": "2.00e-07", + "norm": 5.9876, + "step": 1041 + }, + { + "acc_char": 0.2876, + "acc_token": 0.6663, + "epoch": 0.02, + "loss_char": 0.7454, + "loss_token": 1.7047, + "lr": "2.00e-07", + "norm": 5.9158, + "step": 1042 + }, + { + "acc_char": 0.312, + "acc_token": 0.6979, + "epoch": 0.02, + "loss_char": 0.7015, + "loss_token": 1.6362, + "lr": "2.00e-07", + "norm": 5.7544, + "step": 1043 + }, + { + "acc_char": 0.317, + "acc_token": 0.7015, + "epoch": 0.02, + "loss_char": 0.6757, + "loss_token": 1.5832, + "lr": "2.00e-07", + "norm": 5.8245, + "step": 1044 + }, + { + "acc_char": 0.2898, + "acc_token": 0.6603, + "epoch": 0.02, + "loss_char": 0.7839, + "loss_token": 1.6956, + "lr": "2.00e-07", + "norm": 5.586, + "step": 1045 + }, + { + "acc_char": 0.3107, + "acc_token": 0.7142, + "epoch": 0.02, + "loss_char": 0.6085, + "loss_token": 1.5081, + "lr": "2.00e-07", + "norm": 5.6408, + "step": 1046 + }, + { + "acc_char": 0.307, + "acc_token": 0.6913, + "epoch": 0.02, + "loss_char": 0.719, + "loss_token": 1.6445, + "lr": "2.00e-07", + "norm": 6.0086, + "step": 1047 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6932, + "epoch": 0.02, + "loss_char": 0.6583, + "loss_token": 1.59, + "lr": "2.00e-07", + "norm": 5.6165, + "step": 1048 + }, + { + "acc_char": 0.2898, + "acc_token": 0.6699, + "epoch": 0.02, + "loss_char": 0.7697, + "loss_token": 1.6934, + "lr": "2.00e-07", + "norm": 5.8678, + "step": 1049 + }, + { + "acc_char": 0.2896, + "acc_token": 0.6657, + "epoch": 0.02, + "loss_char": 0.726, + "loss_token": 1.6075, + "lr": "2.00e-07", + "norm": 5.7553, + "step": 1050 + }, + { + "acc_char": 0.3099, + "acc_token": 0.6864, + "epoch": 0.02, + "loss_char": 0.7305, + "loss_token": 1.5594, + "lr": "2.00e-07", + "norm": 5.5492, + "step": 1051 + }, + { + "acc_char": 0.308, + "acc_token": 0.698, + "epoch": 0.02, + "loss_char": 0.6745, + "loss_token": 1.6243, + "lr": "2.00e-07", + "norm": 5.6144, + "step": 1052 + }, + { + "acc_char": 0.3049, + "acc_token": 0.6837, + "epoch": 0.02, + "loss_char": 0.7032, + "loss_token": 1.5993, + "lr": "2.00e-07", + "norm": 5.7339, + "step": 1053 + }, + { + "acc_char": 0.2919, + "acc_token": 0.6712, + "epoch": 0.02, + "loss_char": 0.7139, + "loss_token": 1.6253, + "lr": "2.00e-07", + "norm": 5.5032, + "step": 1054 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6947, + "epoch": 0.02, + "loss_char": 0.6816, + "loss_token": 1.5841, + "lr": "2.00e-07", + "norm": 5.6046, + "step": 1055 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6752, + "epoch": 0.02, + "loss_char": 0.7526, + "loss_token": 1.5791, + "lr": "2.00e-07", + "norm": 5.6113, + "step": 1056 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6852, + "epoch": 0.02, + "loss_char": 0.6962, + "loss_token": 1.5514, + "lr": "2.00e-07", + "norm": 5.402, + "step": 1057 + }, + { + "acc_char": 0.2951, + "acc_token": 0.6855, + "epoch": 0.02, + "loss_char": 0.6932, + "loss_token": 1.6079, + "lr": "2.00e-07", + "norm": 5.5452, + "step": 1058 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6767, + "epoch": 0.02, + "loss_char": 0.7406, + "loss_token": 1.654, + "lr": "2.00e-07", + "norm": 5.6495, + "step": 1059 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6819, + "epoch": 0.02, + "loss_char": 0.7202, + "loss_token": 1.6393, + "lr": "2.00e-07", + "norm": 5.7668, + "step": 1060 + }, + { + "acc_char": 0.3053, + "acc_token": 0.6853, + "epoch": 0.02, + "loss_char": 0.7316, + "loss_token": 1.6124, + "lr": "2.00e-07", + "norm": 5.7434, + "step": 1061 + }, + { + "acc_char": 0.3086, + "acc_token": 0.6933, + "epoch": 0.02, + "loss_char": 0.7046, + "loss_token": 1.5938, + "lr": "2.00e-07", + "norm": 5.7935, + "step": 1062 + }, + { + "acc_char": 0.297, + "acc_token": 0.6864, + "epoch": 0.02, + "loss_char": 0.6721, + "loss_token": 1.5034, + "lr": "2.00e-07", + "norm": 5.5376, + "step": 1063 + }, + { + "acc_char": 0.3153, + "acc_token": 0.6985, + "epoch": 0.02, + "loss_char": 0.7039, + "loss_token": 1.629, + "lr": "2.00e-07", + "norm": 5.7942, + "step": 1064 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6922, + "epoch": 0.02, + "loss_char": 0.6849, + "loss_token": 1.5634, + "lr": "2.00e-07", + "norm": 5.574, + "step": 1065 + }, + { + "acc_char": 0.3042, + "acc_token": 0.6885, + "epoch": 0.02, + "loss_char": 0.7123, + "loss_token": 1.6346, + "lr": "2.00e-07", + "norm": 5.748, + "step": 1066 + }, + { + "acc_char": 0.3022, + "acc_token": 0.6913, + "epoch": 0.02, + "loss_char": 0.6892, + "loss_token": 1.6414, + "lr": "2.00e-07", + "norm": 5.6367, + "step": 1067 + }, + { + "acc_char": 0.3201, + "acc_token": 0.7164, + "epoch": 0.02, + "loss_char": 0.6417, + "loss_token": 1.4198, + "lr": "2.00e-07", + "norm": 5.763, + "step": 1068 + }, + { + "acc_char": 0.3084, + "acc_token": 0.7033, + "epoch": 0.02, + "loss_char": 0.6387, + "loss_token": 1.4254, + "lr": "2.00e-07", + "norm": 5.5338, + "step": 1069 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6755, + "epoch": 0.02, + "loss_char": 0.7654, + "loss_token": 1.66, + "lr": "2.00e-07", + "norm": 5.725, + "step": 1070 + }, + { + "acc_char": 0.3093, + "acc_token": 0.6938, + "epoch": 0.02, + "loss_char": 0.6978, + "loss_token": 1.4869, + "lr": "2.00e-07", + "norm": 5.6716, + "step": 1071 + }, + { + "acc_char": 0.3168, + "acc_token": 0.704, + "epoch": 0.02, + "loss_char": 0.6935, + "loss_token": 1.5235, + "lr": "2.00e-07", + "norm": 5.7914, + "step": 1072 + }, + { + "acc_char": 0.2949, + "acc_token": 0.6807, + "epoch": 0.02, + "loss_char": 0.7211, + "loss_token": 1.4897, + "lr": "2.00e-07", + "norm": 5.769, + "step": 1073 + }, + { + "acc_char": 0.2976, + "acc_token": 0.67, + "epoch": 0.02, + "loss_char": 0.7538, + "loss_token": 1.5103, + "lr": "2.00e-07", + "norm": 5.6134, + "step": 1074 + }, + { + "acc_char": 0.3044, + "acc_token": 0.7082, + "epoch": 0.02, + "loss_char": 0.6335, + "loss_token": 1.5032, + "lr": "2.00e-07", + "norm": 5.7971, + "step": 1075 + }, + { + "acc_char": 0.3111, + "acc_token": 0.6939, + "epoch": 0.02, + "loss_char": 0.6864, + "loss_token": 1.5623, + "lr": "2.00e-07", + "norm": 6.0235, + "step": 1076 + }, + { + "acc_char": 0.298, + "acc_token": 0.677, + "epoch": 0.02, + "loss_char": 0.7431, + "loss_token": 1.6234, + "lr": "2.00e-07", + "norm": 5.7891, + "step": 1077 + }, + { + "acc_char": 0.2977, + "acc_token": 0.6496, + "epoch": 0.02, + "loss_char": 0.8182, + "loss_token": 1.7089, + "lr": "2.00e-07", + "norm": 5.4175, + "step": 1078 + }, + { + "acc_char": 0.3125, + "acc_token": 0.7023, + "epoch": 0.02, + "loss_char": 0.6682, + "loss_token": 1.5609, + "lr": "2.00e-07", + "norm": 5.61, + "step": 1079 + }, + { + "acc_char": 0.289, + "acc_token": 0.629, + "epoch": 0.02, + "loss_char": 0.8491, + "loss_token": 1.7577, + "lr": "2.00e-07", + "norm": 5.6545, + "step": 1080 + }, + { + "acc_char": 0.2979, + "acc_token": 0.6751, + "epoch": 0.02, + "loss_char": 0.742, + "loss_token": 1.6155, + "lr": "2.00e-07", + "norm": 6.1317, + "step": 1081 + }, + { + "acc_char": 0.2807, + "acc_token": 0.6569, + "epoch": 0.02, + "loss_char": 0.7763, + "loss_token": 1.6378, + "lr": "2.00e-07", + "norm": 5.7644, + "step": 1082 + }, + { + "acc_char": 0.3042, + "acc_token": 0.6817, + "epoch": 0.02, + "loss_char": 0.725, + "loss_token": 1.6469, + "lr": "2.00e-07", + "norm": 6.1361, + "step": 1083 + }, + { + "acc_char": 0.3027, + "acc_token": 0.6684, + "epoch": 0.02, + "loss_char": 0.802, + "loss_token": 1.6769, + "lr": "2.00e-07", + "norm": 5.7604, + "step": 1084 + }, + { + "acc_char": 0.3024, + "acc_token": 0.7005, + "epoch": 0.02, + "loss_char": 0.6753, + "loss_token": 1.6225, + "lr": "2.00e-07", + "norm": 6.0857, + "step": 1085 + }, + { + "acc_char": 0.3024, + "acc_token": 0.6782, + "epoch": 0.02, + "loss_char": 0.7807, + "loss_token": 1.6282, + "lr": "2.00e-07", + "norm": 5.5427, + "step": 1086 + }, + { + "acc_char": 0.3037, + "acc_token": 0.6775, + "epoch": 0.02, + "loss_char": 0.7505, + "loss_token": 1.6065, + "lr": "2.00e-07", + "norm": 5.8749, + "step": 1087 + }, + { + "acc_char": 0.305, + "acc_token": 0.6957, + "epoch": 0.02, + "loss_char": 0.6722, + "loss_token": 1.5519, + "lr": "2.00e-07", + "norm": 6.1559, + "step": 1088 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6814, + "epoch": 0.02, + "loss_char": 0.7442, + "loss_token": 1.6725, + "lr": "2.00e-07", + "norm": 5.7716, + "step": 1089 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6795, + "epoch": 0.02, + "loss_char": 0.735, + "loss_token": 1.6029, + "lr": "2.00e-07", + "norm": 5.6591, + "step": 1090 + }, + { + "acc_char": 0.3149, + "acc_token": 0.7071, + "epoch": 0.02, + "loss_char": 0.6583, + "loss_token": 1.5001, + "lr": "2.00e-07", + "norm": 6.1907, + "step": 1091 + }, + { + "acc_char": 0.3112, + "acc_token": 0.6987, + "epoch": 0.02, + "loss_char": 0.6887, + "loss_token": 1.649, + "lr": "2.00e-07", + "norm": 5.9268, + "step": 1092 + }, + { + "acc_char": 0.3063, + "acc_token": 0.6854, + "epoch": 0.02, + "loss_char": 0.7085, + "loss_token": 1.6117, + "lr": "2.00e-07", + "norm": 5.5384, + "step": 1093 + }, + { + "acc_char": 0.3178, + "acc_token": 0.709, + "epoch": 0.02, + "loss_char": 0.6628, + "loss_token": 1.5315, + "lr": "2.00e-07", + "norm": 5.8061, + "step": 1094 + }, + { + "acc_char": 0.2962, + "acc_token": 0.6681, + "epoch": 0.02, + "loss_char": 0.793, + "loss_token": 1.7314, + "lr": "2.00e-07", + "norm": 6.2168, + "step": 1095 + }, + { + "acc_char": 0.2913, + "acc_token": 0.6629, + "epoch": 0.02, + "loss_char": 0.799, + "loss_token": 1.6578, + "lr": "2.00e-07", + "norm": 5.7005, + "step": 1096 + }, + { + "acc_char": 0.3066, + "acc_token": 0.6833, + "epoch": 0.02, + "loss_char": 0.7418, + "loss_token": 1.6199, + "lr": "2.00e-07", + "norm": 5.6565, + "step": 1097 + }, + { + "acc_char": 0.3096, + "acc_token": 0.6978, + "epoch": 0.02, + "loss_char": 0.6901, + "loss_token": 1.5497, + "lr": "2.00e-07", + "norm": 5.6842, + "step": 1098 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6748, + "epoch": 0.02, + "loss_char": 0.7516, + "loss_token": 1.6768, + "lr": "2.00e-07", + "norm": 5.9045, + "step": 1099 + }, + { + "acc_char": 0.308, + "acc_token": 0.6957, + "epoch": 0.02, + "loss_char": 0.7002, + "loss_token": 1.5442, + "lr": "2.00e-07", + "norm": 5.5604, + "step": 1100 + }, + { + "acc_char": 0.3073, + "acc_token": 0.6942, + "epoch": 0.02, + "loss_char": 0.6821, + "loss_token": 1.6096, + "lr": "2.00e-07", + "norm": 5.6615, + "step": 1101 + }, + { + "acc_char": 0.2967, + "acc_token": 0.676, + "epoch": 0.02, + "loss_char": 0.7305, + "loss_token": 1.6356, + "lr": "2.00e-07", + "norm": 5.9914, + "step": 1102 + }, + { + "acc_char": 0.3041, + "acc_token": 0.6879, + "epoch": 0.02, + "loss_char": 0.7077, + "loss_token": 1.6373, + "lr": "2.00e-07", + "norm": 5.6571, + "step": 1103 + }, + { + "acc_char": 0.3059, + "acc_token": 0.692, + "epoch": 0.02, + "loss_char": 0.7083, + "loss_token": 1.5886, + "lr": "2.00e-07", + "norm": 5.7888, + "step": 1104 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6927, + "epoch": 0.02, + "loss_char": 0.6797, + "loss_token": 1.5879, + "lr": "2.00e-07", + "norm": 5.5456, + "step": 1105 + }, + { + "acc_char": 0.3108, + "acc_token": 0.7049, + "epoch": 0.02, + "loss_char": 0.6782, + "loss_token": 1.6428, + "lr": "2.00e-07", + "norm": 5.7501, + "step": 1106 + }, + { + "acc_char": 0.3124, + "acc_token": 0.7192, + "epoch": 0.02, + "loss_char": 0.6138, + "loss_token": 1.4979, + "lr": "2.00e-07", + "norm": 5.5096, + "step": 1107 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6847, + "epoch": 0.02, + "loss_char": 0.7125, + "loss_token": 1.6176, + "lr": "2.00e-07", + "norm": 5.9919, + "step": 1108 + }, + { + "acc_char": 0.2978, + "acc_token": 0.6398, + "epoch": 0.02, + "loss_char": 0.8575, + "loss_token": 1.8423, + "lr": "2.00e-07", + "norm": 5.9944, + "step": 1109 + }, + { + "acc_char": 0.336, + "acc_token": 0.7513, + "epoch": 0.02, + "loss_char": 0.5394, + "loss_token": 1.4633, + "lr": "2.00e-07", + "norm": 5.5493, + "step": 1110 + }, + { + "acc_char": 0.2986, + "acc_token": 0.6811, + "epoch": 0.02, + "loss_char": 0.7307, + "loss_token": 1.6021, + "lr": "2.00e-07", + "norm": 5.977, + "step": 1111 + }, + { + "acc_char": 0.3055, + "acc_token": 0.6839, + "epoch": 0.02, + "loss_char": 0.7099, + "loss_token": 1.6421, + "lr": "2.00e-07", + "norm": 6.1366, + "step": 1112 + }, + { + "acc_char": 0.2972, + "acc_token": 0.6873, + "epoch": 0.02, + "loss_char": 0.6962, + "loss_token": 1.526, + "lr": "2.00e-07", + "norm": 5.6157, + "step": 1113 + }, + { + "acc_char": 0.3069, + "acc_token": 0.658, + "epoch": 0.02, + "loss_char": 0.8129, + "loss_token": 1.6929, + "lr": "2.00e-07", + "norm": 5.2494, + "step": 1114 + }, + { + "acc_char": 0.3093, + "acc_token": 0.7066, + "epoch": 0.02, + "loss_char": 0.6525, + "loss_token": 1.5155, + "lr": "2.00e-07", + "norm": 5.5897, + "step": 1115 + }, + { + "acc_char": 0.3035, + "acc_token": 0.6805, + "epoch": 0.02, + "loss_char": 0.778, + "loss_token": 1.6953, + "lr": "2.00e-07", + "norm": 5.9045, + "step": 1116 + }, + { + "acc_char": 0.3072, + "acc_token": 0.6812, + "epoch": 0.02, + "loss_char": 0.7494, + "loss_token": 1.6054, + "lr": "2.00e-07", + "norm": 5.7761, + "step": 1117 + }, + { + "acc_char": 0.3175, + "acc_token": 0.7154, + "epoch": 0.02, + "loss_char": 0.6509, + "loss_token": 1.6047, + "lr": "2.00e-07", + "norm": 5.8353, + "step": 1118 + }, + { + "acc_char": 0.2921, + "acc_token": 0.6718, + "epoch": 0.02, + "loss_char": 0.7321, + "loss_token": 1.6241, + "lr": "2.00e-07", + "norm": 5.8048, + "step": 1119 + }, + { + "acc_char": 0.3005, + "acc_token": 0.6899, + "epoch": 0.02, + "loss_char": 0.6899, + "loss_token": 1.6578, + "lr": "2.00e-07", + "norm": 5.7674, + "step": 1120 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6599, + "epoch": 0.02, + "loss_char": 0.7958, + "loss_token": 1.7595, + "lr": "2.00e-07", + "norm": 5.7997, + "step": 1121 + }, + { + "acc_char": 0.2968, + "acc_token": 0.6699, + "epoch": 0.02, + "loss_char": 0.7646, + "loss_token": 1.6413, + "lr": "2.00e-07", + "norm": 6.2408, + "step": 1122 + }, + { + "acc_char": 0.3, + "acc_token": 0.674, + "epoch": 0.02, + "loss_char": 0.7489, + "loss_token": 1.6281, + "lr": "2.00e-07", + "norm": 6.1318, + "step": 1123 + }, + { + "acc_char": 0.305, + "acc_token": 0.6771, + "epoch": 0.02, + "loss_char": 0.7268, + "loss_token": 1.6143, + "lr": "2.00e-07", + "norm": 5.8492, + "step": 1124 + }, + { + "acc_char": 0.3118, + "acc_token": 0.7104, + "epoch": 0.02, + "loss_char": 0.6392, + "loss_token": 1.4847, + "lr": "2.00e-07", + "norm": 5.6128, + "step": 1125 + }, + { + "acc_char": 0.3019, + "acc_token": 0.6886, + "epoch": 0.02, + "loss_char": 0.695, + "loss_token": 1.6002, + "lr": "2.00e-07", + "norm": 5.6202, + "step": 1126 + }, + { + "acc_char": 0.2998, + "acc_token": 0.6811, + "epoch": 0.02, + "loss_char": 0.7349, + "loss_token": 1.6541, + "lr": "2.00e-07", + "norm": 5.9175, + "step": 1127 + }, + { + "acc_char": 0.3002, + "acc_token": 0.6721, + "epoch": 0.02, + "loss_char": 0.8037, + "loss_token": 1.6463, + "lr": "2.00e-07", + "norm": 5.9228, + "step": 1128 + }, + { + "acc_char": 0.2939, + "acc_token": 0.6658, + "epoch": 0.02, + "loss_char": 0.7702, + "loss_token": 1.6422, + "lr": "2.00e-07", + "norm": 5.8841, + "step": 1129 + }, + { + "acc_char": 0.3095, + "acc_token": 0.7018, + "epoch": 0.02, + "loss_char": 0.6496, + "loss_token": 1.5094, + "lr": "2.00e-07", + "norm": 5.4645, + "step": 1130 + }, + { + "acc_char": 0.2889, + "acc_token": 0.6654, + "epoch": 0.02, + "loss_char": 0.7496, + "loss_token": 1.6055, + "lr": "2.00e-07", + "norm": 5.7124, + "step": 1131 + }, + { + "acc_char": 0.303, + "acc_token": 0.6727, + "epoch": 0.02, + "loss_char": 0.748, + "loss_token": 1.6193, + "lr": "2.00e-07", + "norm": 5.7687, + "step": 1132 + }, + { + "acc_char": 0.2895, + "acc_token": 0.6682, + "epoch": 0.02, + "loss_char": 0.7351, + "loss_token": 1.7073, + "lr": "2.00e-07", + "norm": 5.9363, + "step": 1133 + }, + { + "acc_char": 0.3023, + "acc_token": 0.6841, + "epoch": 0.02, + "loss_char": 0.7325, + "loss_token": 1.68, + "lr": "2.00e-07", + "norm": 5.6545, + "step": 1134 + }, + { + "acc_char": 0.3073, + "acc_token": 0.6845, + "epoch": 0.02, + "loss_char": 0.7218, + "loss_token": 1.6811, + "lr": "2.00e-07", + "norm": 5.7013, + "step": 1135 + }, + { + "acc_char": 0.3153, + "acc_token": 0.7098, + "epoch": 0.02, + "loss_char": 0.6496, + "loss_token": 1.6311, + "lr": "2.00e-07", + "norm": 5.502, + "step": 1136 + }, + { + "acc_char": 0.3176, + "acc_token": 0.7143, + "epoch": 0.02, + "loss_char": 0.6434, + "loss_token": 1.4789, + "lr": "2.00e-07", + "norm": 5.409, + "step": 1137 + }, + { + "acc_char": 0.2948, + "acc_token": 0.6224, + "epoch": 0.02, + "loss_char": 0.9341, + "loss_token": 1.8052, + "lr": "2.00e-07", + "norm": 5.7219, + "step": 1138 + }, + { + "acc_char": 0.314, + "acc_token": 0.7197, + "epoch": 0.02, + "loss_char": 0.6184, + "loss_token": 1.4714, + "lr": "2.00e-07", + "norm": 5.2736, + "step": 1139 + }, + { + "acc_char": 0.3037, + "acc_token": 0.6837, + "epoch": 0.02, + "loss_char": 0.7257, + "loss_token": 1.6964, + "lr": "2.00e-07", + "norm": 5.7293, + "step": 1140 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6801, + "epoch": 0.02, + "loss_char": 0.7177, + "loss_token": 1.6569, + "lr": "2.00e-07", + "norm": 5.8446, + "step": 1141 + }, + { + "acc_char": 0.3144, + "acc_token": 0.7021, + "epoch": 0.02, + "loss_char": 0.6756, + "loss_token": 1.5498, + "lr": "2.00e-07", + "norm": 5.4456, + "step": 1142 + }, + { + "acc_char": 0.3091, + "acc_token": 0.7007, + "epoch": 0.02, + "loss_char": 0.6462, + "loss_token": 1.5228, + "lr": "2.00e-07", + "norm": 5.4793, + "step": 1143 + }, + { + "acc_char": 0.3141, + "acc_token": 0.7055, + "epoch": 0.02, + "loss_char": 0.6789, + "loss_token": 1.5552, + "lr": "2.00e-07", + "norm": 5.6754, + "step": 1144 + }, + { + "acc_char": 0.3149, + "acc_token": 0.7041, + "epoch": 0.02, + "loss_char": 0.677, + "loss_token": 1.6774, + "lr": "2.00e-07", + "norm": 5.6087, + "step": 1145 + }, + { + "acc_char": 0.2854, + "acc_token": 0.6525, + "epoch": 0.02, + "loss_char": 0.8141, + "loss_token": 1.7267, + "lr": "2.00e-07", + "norm": 6.0019, + "step": 1146 + }, + { + "acc_char": 0.3099, + "acc_token": 0.7048, + "epoch": 0.02, + "loss_char": 0.6781, + "loss_token": 1.5686, + "lr": "2.00e-07", + "norm": 5.6791, + "step": 1147 + }, + { + "acc_char": 0.296, + "acc_token": 0.675, + "epoch": 0.02, + "loss_char": 0.7463, + "loss_token": 1.679, + "lr": "2.00e-07", + "norm": 5.5865, + "step": 1148 + }, + { + "acc_char": 0.3138, + "acc_token": 0.7211, + "epoch": 0.02, + "loss_char": 0.6145, + "loss_token": 1.5026, + "lr": "2.00e-07", + "norm": 5.7165, + "step": 1149 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6962, + "epoch": 0.02, + "loss_char": 0.657, + "loss_token": 1.5813, + "lr": "2.00e-07", + "norm": 5.806, + "step": 1150 + }, + { + "acc_char": 0.3103, + "acc_token": 0.7005, + "epoch": 0.02, + "loss_char": 0.6963, + "loss_token": 1.6262, + "lr": "2.00e-07", + "norm": 5.6984, + "step": 1151 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6378, + "epoch": 0.02, + "loss_char": 0.8329, + "loss_token": 1.7329, + "lr": "2.00e-07", + "norm": 5.4393, + "step": 1152 + }, + { + "acc_char": 0.2946, + "acc_token": 0.6716, + "epoch": 0.02, + "loss_char": 0.7424, + "loss_token": 1.5892, + "lr": "2.00e-07", + "norm": 6.0617, + "step": 1153 + }, + { + "acc_char": 0.3046, + "acc_token": 0.6855, + "epoch": 0.02, + "loss_char": 0.7098, + "loss_token": 1.6979, + "lr": "2.00e-07", + "norm": 5.6377, + "step": 1154 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6955, + "epoch": 0.02, + "loss_char": 0.6606, + "loss_token": 1.5007, + "lr": "2.00e-07", + "norm": 5.4087, + "step": 1155 + }, + { + "acc_char": 0.2943, + "acc_token": 0.6602, + "epoch": 0.02, + "loss_char": 0.7823, + "loss_token": 1.6413, + "lr": "2.00e-07", + "norm": 5.8073, + "step": 1156 + }, + { + "acc_char": 0.2912, + "acc_token": 0.698, + "epoch": 0.02, + "loss_char": 0.6182, + "loss_token": 1.5402, + "lr": "2.00e-07", + "norm": 5.8262, + "step": 1157 + }, + { + "acc_char": 0.3198, + "acc_token": 0.7236, + "epoch": 0.02, + "loss_char": 0.6016, + "loss_token": 1.4951, + "lr": "2.00e-07", + "norm": 5.5455, + "step": 1158 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6685, + "epoch": 0.02, + "loss_char": 0.7937, + "loss_token": 1.6673, + "lr": "2.00e-07", + "norm": 5.9008, + "step": 1159 + }, + { + "acc_char": 0.3045, + "acc_token": 0.6974, + "epoch": 0.02, + "loss_char": 0.6667, + "loss_token": 1.5969, + "lr": "2.00e-07", + "norm": 5.8918, + "step": 1160 + }, + { + "acc_char": 0.3024, + "acc_token": 0.685, + "epoch": 0.02, + "loss_char": 0.7058, + "loss_token": 1.5962, + "lr": "2.00e-07", + "norm": 5.6316, + "step": 1161 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6877, + "epoch": 0.02, + "loss_char": 0.7297, + "loss_token": 1.6399, + "lr": "2.00e-07", + "norm": 5.7485, + "step": 1162 + }, + { + "acc_char": 0.2939, + "acc_token": 0.6676, + "epoch": 0.02, + "loss_char": 0.7448, + "loss_token": 1.6946, + "lr": "2.00e-07", + "norm": 5.8275, + "step": 1163 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6583, + "epoch": 0.02, + "loss_char": 0.7806, + "loss_token": 1.698, + "lr": "2.00e-07", + "norm": 5.6786, + "step": 1164 + }, + { + "acc_char": 0.2888, + "acc_token": 0.6536, + "epoch": 0.02, + "loss_char": 0.8236, + "loss_token": 1.7791, + "lr": "2.00e-07", + "norm": 6.2519, + "step": 1165 + }, + { + "acc_char": 0.3082, + "acc_token": 0.6946, + "epoch": 0.02, + "loss_char": 0.7213, + "loss_token": 1.6737, + "lr": "2.00e-07", + "norm": 5.6594, + "step": 1166 + }, + { + "acc_char": 0.2911, + "acc_token": 0.6662, + "epoch": 0.02, + "loss_char": 0.7801, + "loss_token": 1.7293, + "lr": "2.00e-07", + "norm": 5.8279, + "step": 1167 + }, + { + "acc_char": 0.2991, + "acc_token": 0.6862, + "epoch": 0.02, + "loss_char": 0.6893, + "loss_token": 1.6088, + "lr": "2.00e-07", + "norm": 5.7553, + "step": 1168 + }, + { + "acc_char": 0.3086, + "acc_token": 0.703, + "epoch": 0.02, + "loss_char": 0.6888, + "loss_token": 1.5782, + "lr": "2.00e-07", + "norm": 5.7771, + "step": 1169 + }, + { + "acc_char": 0.3103, + "acc_token": 0.713, + "epoch": 0.02, + "loss_char": 0.6274, + "loss_token": 1.5235, + "lr": "2.00e-07", + "norm": 5.775, + "step": 1170 + }, + { + "acc_char": 0.2932, + "acc_token": 0.6684, + "epoch": 0.02, + "loss_char": 0.7567, + "loss_token": 1.5922, + "lr": "2.00e-07", + "norm": 5.9991, + "step": 1171 + }, + { + "acc_char": 0.3162, + "acc_token": 0.7083, + "epoch": 0.02, + "loss_char": 0.6703, + "loss_token": 1.6602, + "lr": "2.00e-07", + "norm": 5.4752, + "step": 1172 + }, + { + "acc_char": 0.2892, + "acc_token": 0.6303, + "epoch": 0.02, + "loss_char": 0.8684, + "loss_token": 1.7126, + "lr": "2.00e-07", + "norm": 5.755, + "step": 1173 + }, + { + "acc_char": 0.2921, + "acc_token": 0.666, + "epoch": 0.02, + "loss_char": 0.7583, + "loss_token": 1.6371, + "lr": "2.00e-07", + "norm": 5.6528, + "step": 1174 + }, + { + "acc_char": 0.2959, + "acc_token": 0.686, + "epoch": 0.02, + "loss_char": 0.6875, + "loss_token": 1.5647, + "lr": "2.00e-07", + "norm": 5.4077, + "step": 1175 + }, + { + "acc_char": 0.3114, + "acc_token": 0.6969, + "epoch": 0.02, + "loss_char": 0.6888, + "loss_token": 1.5857, + "lr": "2.00e-07", + "norm": 5.8141, + "step": 1176 + }, + { + "acc_char": 0.3214, + "acc_token": 0.7065, + "epoch": 0.02, + "loss_char": 0.6635, + "loss_token": 1.4552, + "lr": "2.00e-07", + "norm": 5.7959, + "step": 1177 + }, + { + "acc_char": 0.3034, + "acc_token": 0.6808, + "epoch": 0.02, + "loss_char": 0.7381, + "loss_token": 1.7253, + "lr": "2.00e-07", + "norm": 5.9402, + "step": 1178 + }, + { + "acc_char": 0.3135, + "acc_token": 0.7093, + "epoch": 0.02, + "loss_char": 0.6598, + "loss_token": 1.5537, + "lr": "2.00e-07", + "norm": 5.4749, + "step": 1179 + }, + { + "acc_char": 0.2853, + "acc_token": 0.6284, + "epoch": 0.02, + "loss_char": 0.8759, + "loss_token": 1.733, + "lr": "2.00e-07", + "norm": 5.8521, + "step": 1180 + }, + { + "acc_char": 0.274, + "acc_token": 0.6284, + "epoch": 0.02, + "loss_char": 0.8762, + "loss_token": 1.7769, + "lr": "2.00e-07", + "norm": 5.8663, + "step": 1181 + }, + { + "acc_char": 0.3098, + "acc_token": 0.6998, + "epoch": 0.02, + "loss_char": 0.6929, + "loss_token": 1.5764, + "lr": "2.00e-07", + "norm": 5.6745, + "step": 1182 + }, + { + "acc_char": 0.3059, + "acc_token": 0.6963, + "epoch": 0.02, + "loss_char": 0.6876, + "loss_token": 1.6232, + "lr": "2.00e-07", + "norm": 5.8334, + "step": 1183 + }, + { + "acc_char": 0.3013, + "acc_token": 0.6847, + "epoch": 0.02, + "loss_char": 0.6982, + "loss_token": 1.5633, + "lr": "2.00e-07", + "norm": 5.6752, + "step": 1184 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6781, + "epoch": 0.02, + "loss_char": 0.7393, + "loss_token": 1.6662, + "lr": "2.00e-07", + "norm": 5.9113, + "step": 1185 + }, + { + "acc_char": 0.2929, + "acc_token": 0.6713, + "epoch": 0.02, + "loss_char": 0.7428, + "loss_token": 1.6182, + "lr": "2.00e-07", + "norm": 6.1931, + "step": 1186 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6851, + "epoch": 0.02, + "loss_char": 0.6948, + "loss_token": 1.6335, + "lr": "2.00e-07", + "norm": 5.8183, + "step": 1187 + }, + { + "acc_char": 0.3169, + "acc_token": 0.708, + "epoch": 0.02, + "loss_char": 0.6941, + "loss_token": 1.5985, + "lr": "2.00e-07", + "norm": 6.1227, + "step": 1188 + }, + { + "acc_char": 0.2947, + "acc_token": 0.6685, + "epoch": 0.02, + "loss_char": 0.7599, + "loss_token": 1.6387, + "lr": "2.00e-07", + "norm": 5.9399, + "step": 1189 + }, + { + "acc_char": 0.2882, + "acc_token": 0.6734, + "epoch": 0.02, + "loss_char": 0.7478, + "loss_token": 1.6861, + "lr": "2.00e-07", + "norm": 5.8545, + "step": 1190 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6772, + "epoch": 0.02, + "loss_char": 0.7301, + "loss_token": 1.6765, + "lr": "2.00e-07", + "norm": 6.0008, + "step": 1191 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6815, + "epoch": 0.02, + "loss_char": 0.7253, + "loss_token": 1.5029, + "lr": "2.00e-07", + "norm": 5.5761, + "step": 1192 + }, + { + "acc_char": 0.2976, + "acc_token": 0.6713, + "epoch": 0.02, + "loss_char": 0.7522, + "loss_token": 1.6588, + "lr": "2.00e-07", + "norm": 5.679, + "step": 1193 + }, + { + "acc_char": 0.2938, + "acc_token": 0.668, + "epoch": 0.02, + "loss_char": 0.7532, + "loss_token": 1.6303, + "lr": "2.00e-07", + "norm": 5.6541, + "step": 1194 + }, + { + "acc_char": 0.3037, + "acc_token": 0.6695, + "epoch": 0.02, + "loss_char": 0.7715, + "loss_token": 1.6237, + "lr": "2.00e-07", + "norm": 6.1823, + "step": 1195 + }, + { + "acc_char": 0.298, + "acc_token": 0.6762, + "epoch": 0.02, + "loss_char": 0.7422, + "loss_token": 1.5991, + "lr": "2.00e-07", + "norm": 5.8473, + "step": 1196 + }, + { + "acc_char": 0.3136, + "acc_token": 0.7075, + "epoch": 0.02, + "loss_char": 0.6773, + "loss_token": 1.5642, + "lr": "2.00e-07", + "norm": 5.6722, + "step": 1197 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6751, + "epoch": 0.02, + "loss_char": 0.7141, + "loss_token": 1.6704, + "lr": "2.00e-07", + "norm": 6.141, + "step": 1198 + }, + { + "acc_char": 0.296, + "acc_token": 0.6694, + "epoch": 0.02, + "loss_char": 0.7446, + "loss_token": 1.6256, + "lr": "2.00e-07", + "norm": 5.942, + "step": 1199 + }, + { + "acc_char": 0.2992, + "acc_token": 0.6891, + "epoch": 0.02, + "loss_char": 0.7081, + "loss_token": 1.5872, + "lr": "2.00e-07", + "norm": 5.6528, + "step": 1200 + }, + { + "acc_char": 0.3113, + "acc_token": 0.6944, + "epoch": 0.02, + "loss_char": 0.6927, + "loss_token": 1.5479, + "lr": "2.00e-07", + "norm": 5.574, + "step": 1201 + }, + { + "acc_char": 0.2949, + "acc_token": 0.6881, + "epoch": 0.02, + "loss_char": 0.6741, + "loss_token": 1.5658, + "lr": "2.00e-07", + "norm": 5.6061, + "step": 1202 + }, + { + "acc_char": 0.3162, + "acc_token": 0.7157, + "epoch": 0.02, + "loss_char": 0.6284, + "loss_token": 1.5231, + "lr": "2.00e-07", + "norm": 5.5183, + "step": 1203 + }, + { + "acc_char": 0.2949, + "acc_token": 0.6586, + "epoch": 0.02, + "loss_char": 0.7916, + "loss_token": 1.725, + "lr": "2.00e-07", + "norm": 5.9887, + "step": 1204 + }, + { + "acc_char": 0.3105, + "acc_token": 0.7052, + "epoch": 0.02, + "loss_char": 0.6655, + "loss_token": 1.6009, + "lr": "2.00e-07", + "norm": 5.7779, + "step": 1205 + }, + { + "acc_char": 0.3019, + "acc_token": 0.6792, + "epoch": 0.02, + "loss_char": 0.7376, + "loss_token": 1.5953, + "lr": "2.00e-07", + "norm": 5.8897, + "step": 1206 + }, + { + "acc_char": 0.3059, + "acc_token": 0.6879, + "epoch": 0.02, + "loss_char": 0.7213, + "loss_token": 1.5719, + "lr": "2.00e-07", + "norm": 5.6923, + "step": 1207 + }, + { + "acc_char": 0.2865, + "acc_token": 0.6294, + "epoch": 0.02, + "loss_char": 0.8252, + "loss_token": 1.7565, + "lr": "2.00e-07", + "norm": 5.7013, + "step": 1208 + }, + { + "acc_char": 0.3107, + "acc_token": 0.6958, + "epoch": 0.02, + "loss_char": 0.7085, + "loss_token": 1.6167, + "lr": "2.00e-07", + "norm": 5.6654, + "step": 1209 + }, + { + "acc_char": 0.3115, + "acc_token": 0.6773, + "epoch": 0.02, + "loss_char": 0.7292, + "loss_token": 1.5463, + "lr": "2.00e-07", + "norm": 5.4154, + "step": 1210 + }, + { + "acc_char": 0.3196, + "acc_token": 0.7027, + "epoch": 0.02, + "loss_char": 0.6707, + "loss_token": 1.5965, + "lr": "2.00e-07", + "norm": 6.0887, + "step": 1211 + }, + { + "acc_char": 0.3162, + "acc_token": 0.7024, + "epoch": 0.02, + "loss_char": 0.6739, + "loss_token": 1.5327, + "lr": "2.00e-07", + "norm": 5.9695, + "step": 1212 + }, + { + "acc_char": 0.3201, + "acc_token": 0.7112, + "epoch": 0.02, + "loss_char": 0.6603, + "loss_token": 1.4594, + "lr": "2.00e-07", + "norm": 5.4631, + "step": 1213 + }, + { + "acc_char": 0.3123, + "acc_token": 0.7183, + "epoch": 0.02, + "loss_char": 0.606, + "loss_token": 1.4774, + "lr": "2.00e-07", + "norm": 5.5688, + "step": 1214 + }, + { + "acc_char": 0.308, + "acc_token": 0.6938, + "epoch": 0.02, + "loss_char": 0.6814, + "loss_token": 1.525, + "lr": "2.00e-07", + "norm": 5.4292, + "step": 1215 + }, + { + "acc_char": 0.3054, + "acc_token": 0.6819, + "epoch": 0.02, + "loss_char": 0.7364, + "loss_token": 1.6021, + "lr": "2.00e-07", + "norm": 5.5374, + "step": 1216 + }, + { + "acc_char": 0.3228, + "acc_token": 0.7098, + "epoch": 0.02, + "loss_char": 0.6828, + "loss_token": 1.5927, + "lr": "2.00e-07", + "norm": 6.0024, + "step": 1217 + }, + { + "acc_char": 0.3128, + "acc_token": 0.69, + "epoch": 0.02, + "loss_char": 0.717, + "loss_token": 1.61, + "lr": "2.00e-07", + "norm": 5.756, + "step": 1218 + }, + { + "acc_char": 0.3006, + "acc_token": 0.684, + "epoch": 0.02, + "loss_char": 0.7142, + "loss_token": 1.6364, + "lr": "2.00e-07", + "norm": 5.6081, + "step": 1219 + }, + { + "acc_char": 0.2954, + "acc_token": 0.6756, + "epoch": 0.02, + "loss_char": 0.7117, + "loss_token": 1.5118, + "lr": "2.00e-07", + "norm": 5.7647, + "step": 1220 + }, + { + "acc_char": 0.3062, + "acc_token": 0.7, + "epoch": 0.02, + "loss_char": 0.6784, + "loss_token": 1.6177, + "lr": "2.00e-07", + "norm": 5.5468, + "step": 1221 + }, + { + "acc_char": 0.2938, + "acc_token": 0.6753, + "epoch": 0.02, + "loss_char": 0.7119, + "loss_token": 1.5672, + "lr": "2.00e-07", + "norm": 6.0324, + "step": 1222 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6842, + "epoch": 0.02, + "loss_char": 0.7241, + "loss_token": 1.6795, + "lr": "2.00e-07", + "norm": 6.0055, + "step": 1223 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6746, + "epoch": 0.02, + "loss_char": 0.7454, + "loss_token": 1.6286, + "lr": "2.00e-07", + "norm": 5.8116, + "step": 1224 + }, + { + "acc_char": 0.2895, + "acc_token": 0.6561, + "epoch": 0.02, + "loss_char": 0.7919, + "loss_token": 1.7589, + "lr": "2.00e-07", + "norm": 6.1568, + "step": 1225 + }, + { + "acc_char": 0.3112, + "acc_token": 0.6893, + "epoch": 0.02, + "loss_char": 0.7214, + "loss_token": 1.5514, + "lr": "2.00e-07", + "norm": 6.0475, + "step": 1226 + }, + { + "acc_char": 0.3006, + "acc_token": 0.6802, + "epoch": 0.02, + "loss_char": 0.7417, + "loss_token": 1.6405, + "lr": "2.00e-07", + "norm": 5.888, + "step": 1227 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6747, + "epoch": 0.02, + "loss_char": 0.7373, + "loss_token": 1.6025, + "lr": "2.00e-07", + "norm": 5.9288, + "step": 1228 + }, + { + "acc_char": 0.3079, + "acc_token": 0.6781, + "epoch": 0.02, + "loss_char": 0.7676, + "loss_token": 1.6012, + "lr": "2.00e-07", + "norm": 5.8617, + "step": 1229 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6851, + "epoch": 0.02, + "loss_char": 0.7017, + "loss_token": 1.6302, + "lr": "2.00e-07", + "norm": 5.8773, + "step": 1230 + }, + { + "acc_char": 0.2894, + "acc_token": 0.6596, + "epoch": 0.02, + "loss_char": 0.7961, + "loss_token": 1.6606, + "lr": "2.00e-07", + "norm": 5.7945, + "step": 1231 + }, + { + "acc_char": 0.3134, + "acc_token": 0.696, + "epoch": 0.02, + "loss_char": 0.7082, + "loss_token": 1.5876, + "lr": "2.00e-07", + "norm": 5.7545, + "step": 1232 + }, + { + "acc_char": 0.2867, + "acc_token": 0.6524, + "epoch": 0.02, + "loss_char": 0.7828, + "loss_token": 1.6832, + "lr": "2.00e-07", + "norm": 5.6264, + "step": 1233 + }, + { + "acc_char": 0.2902, + "acc_token": 0.6328, + "epoch": 0.02, + "loss_char": 0.913, + "loss_token": 1.7744, + "lr": "2.00e-07", + "norm": 6.6499, + "step": 1234 + }, + { + "acc_char": 0.315, + "acc_token": 0.7204, + "epoch": 0.02, + "loss_char": 0.623, + "loss_token": 1.422, + "lr": "2.00e-07", + "norm": 5.5104, + "step": 1235 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6784, + "epoch": 0.02, + "loss_char": 0.7408, + "loss_token": 1.6017, + "lr": "2.00e-07", + "norm": 2.732, + "step": 1236 + }, + { + "acc_char": 0.3078, + "acc_token": 0.6977, + "epoch": 0.02, + "loss_char": 0.6765, + "loss_token": 1.6248, + "lr": "2.00e-07", + "norm": 5.7629, + "step": 1237 + }, + { + "acc_char": 0.3089, + "acc_token": 0.7052, + "epoch": 0.02, + "loss_char": 0.6535, + "loss_token": 1.5843, + "lr": "2.00e-07", + "norm": 5.6524, + "step": 1238 + }, + { + "acc_char": 0.319, + "acc_token": 0.7262, + "epoch": 0.02, + "loss_char": 0.615, + "loss_token": 1.5164, + "lr": "2.00e-07", + "norm": 5.5803, + "step": 1239 + }, + { + "acc_char": 0.2951, + "acc_token": 0.6735, + "epoch": 0.02, + "loss_char": 0.7513, + "loss_token": 1.6776, + "lr": "2.00e-07", + "norm": 5.7461, + "step": 1240 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6742, + "epoch": 0.02, + "loss_char": 0.7417, + "loss_token": 1.6581, + "lr": "2.00e-07", + "norm": 5.9435, + "step": 1241 + }, + { + "acc_char": 0.2895, + "acc_token": 0.6463, + "epoch": 0.02, + "loss_char": 0.8473, + "loss_token": 1.8006, + "lr": "2.00e-07", + "norm": 6.0168, + "step": 1242 + }, + { + "acc_char": 0.3064, + "acc_token": 0.7035, + "epoch": 0.02, + "loss_char": 0.6319, + "loss_token": 1.5342, + "lr": "2.00e-07", + "norm": 5.755, + "step": 1243 + }, + { + "acc_char": 0.3018, + "acc_token": 0.6873, + "epoch": 0.02, + "loss_char": 0.6987, + "loss_token": 1.5935, + "lr": "2.00e-07", + "norm": 5.5453, + "step": 1244 + }, + { + "acc_char": 0.2961, + "acc_token": 0.6757, + "epoch": 0.02, + "loss_char": 0.7513, + "loss_token": 1.6462, + "lr": "2.00e-07", + "norm": 7.3667, + "step": 1245 + }, + { + "acc_char": 0.31, + "acc_token": 0.6903, + "epoch": 0.02, + "loss_char": 0.7214, + "loss_token": 1.5986, + "lr": "2.00e-07", + "norm": 5.6851, + "step": 1246 + }, + { + "acc_char": 0.3029, + "acc_token": 0.6882, + "epoch": 0.02, + "loss_char": 0.7119, + "loss_token": 1.6474, + "lr": "2.00e-07", + "norm": 5.829, + "step": 1247 + }, + { + "acc_char": 0.3095, + "acc_token": 0.7146, + "epoch": 0.02, + "loss_char": 0.6218, + "loss_token": 1.5106, + "lr": "2.00e-07", + "norm": 5.4342, + "step": 1248 + }, + { + "acc_char": 0.2975, + "acc_token": 0.6854, + "epoch": 0.02, + "loss_char": 0.6857, + "loss_token": 1.6246, + "lr": "2.00e-07", + "norm": 5.7823, + "step": 1249 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6944, + "epoch": 0.02, + "loss_char": 0.6883, + "loss_token": 1.5675, + "lr": "2.00e-07", + "norm": 5.4636, + "step": 1250 + }, + { + "acc_char": 0.3069, + "acc_token": 0.6922, + "epoch": 0.02, + "loss_char": 0.7031, + "loss_token": 1.5125, + "lr": "2.00e-07", + "norm": 5.4007, + "step": 1251 + }, + { + "acc_char": 0.3049, + "acc_token": 0.6812, + "epoch": 0.02, + "loss_char": 0.7204, + "loss_token": 1.5756, + "lr": "2.00e-07", + "norm": 5.9017, + "step": 1252 + }, + { + "acc_char": 0.3148, + "acc_token": 0.7214, + "epoch": 0.02, + "loss_char": 0.6013, + "loss_token": 1.4596, + "lr": "2.00e-07", + "norm": 5.4911, + "step": 1253 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6694, + "epoch": 0.02, + "loss_char": 0.7707, + "loss_token": 1.5119, + "lr": "2.00e-07", + "norm": 5.656, + "step": 1254 + }, + { + "acc_char": 0.3015, + "acc_token": 0.6944, + "epoch": 0.02, + "loss_char": 0.6839, + "loss_token": 1.6382, + "lr": "2.00e-07", + "norm": 5.6153, + "step": 1255 + }, + { + "acc_char": 0.3001, + "acc_token": 0.6474, + "epoch": 0.02, + "loss_char": 0.8154, + "loss_token": 1.6608, + "lr": "2.00e-07", + "norm": 5.5675, + "step": 1256 + }, + { + "acc_char": 0.3161, + "acc_token": 0.704, + "epoch": 0.02, + "loss_char": 0.6828, + "loss_token": 1.5635, + "lr": "2.00e-07", + "norm": 5.5283, + "step": 1257 + }, + { + "acc_char": 0.3108, + "acc_token": 0.6962, + "epoch": 0.02, + "loss_char": 0.6988, + "loss_token": 1.5846, + "lr": "2.00e-07", + "norm": 5.6018, + "step": 1258 + }, + { + "acc_char": 0.2944, + "acc_token": 0.6763, + "epoch": 0.02, + "loss_char": 0.7116, + "loss_token": 1.607, + "lr": "2.00e-07", + "norm": 5.6171, + "step": 1259 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6785, + "epoch": 0.02, + "loss_char": 0.7239, + "loss_token": 1.6913, + "lr": "2.00e-07", + "norm": 5.7759, + "step": 1260 + }, + { + "acc_char": 0.3042, + "acc_token": 0.6966, + "epoch": 0.02, + "loss_char": 0.6699, + "loss_token": 1.5636, + "lr": "2.00e-07", + "norm": 6.2067, + "step": 1261 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6694, + "epoch": 0.02, + "loss_char": 0.7628, + "loss_token": 1.6383, + "lr": "2.00e-07", + "norm": 5.6774, + "step": 1262 + }, + { + "acc_char": 0.2891, + "acc_token": 0.6604, + "epoch": 0.02, + "loss_char": 0.7724, + "loss_token": 1.6981, + "lr": "2.00e-07", + "norm": 5.9932, + "step": 1263 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6786, + "epoch": 0.02, + "loss_char": 0.7339, + "loss_token": 1.5778, + "lr": "2.00e-07", + "norm": 5.6281, + "step": 1264 + }, + { + "acc_char": 0.2989, + "acc_token": 0.6753, + "epoch": 0.02, + "loss_char": 0.7396, + "loss_token": 1.5632, + "lr": "2.00e-07", + "norm": 5.6482, + "step": 1265 + }, + { + "acc_char": 0.3088, + "acc_token": 0.7037, + "epoch": 0.02, + "loss_char": 0.6675, + "loss_token": 1.4307, + "lr": "2.00e-07", + "norm": 5.6762, + "step": 1266 + }, + { + "acc_char": 0.3097, + "acc_token": 0.7041, + "epoch": 0.02, + "loss_char": 0.6608, + "loss_token": 1.5348, + "lr": "2.00e-07", + "norm": 5.767, + "step": 1267 + }, + { + "acc_char": 0.2969, + "acc_token": 0.6725, + "epoch": 0.02, + "loss_char": 0.7345, + "loss_token": 1.68, + "lr": "2.00e-07", + "norm": 5.801, + "step": 1268 + }, + { + "acc_char": 0.3099, + "acc_token": 0.6968, + "epoch": 0.02, + "loss_char": 0.6917, + "loss_token": 1.5728, + "lr": "2.00e-07", + "norm": 5.5568, + "step": 1269 + }, + { + "acc_char": 0.3226, + "acc_token": 0.6871, + "epoch": 0.02, + "loss_char": 0.7358, + "loss_token": 1.5833, + "lr": "2.00e-07", + "norm": 5.5733, + "step": 1270 + }, + { + "acc_char": 0.3057, + "acc_token": 0.6873, + "epoch": 0.02, + "loss_char": 0.7212, + "loss_token": 1.669, + "lr": "2.00e-07", + "norm": 5.8983, + "step": 1271 + }, + { + "acc_char": 0.3064, + "acc_token": 0.6861, + "epoch": 0.02, + "loss_char": 0.7191, + "loss_token": 1.6987, + "lr": "2.00e-07", + "norm": 6.279, + "step": 1272 + }, + { + "acc_char": 0.2931, + "acc_token": 0.6685, + "epoch": 0.02, + "loss_char": 0.75, + "loss_token": 1.5613, + "lr": "2.00e-07", + "norm": 5.8721, + "step": 1273 + }, + { + "acc_char": 0.2968, + "acc_token": 0.6677, + "epoch": 0.02, + "loss_char": 0.7697, + "loss_token": 1.639, + "lr": "2.00e-07", + "norm": 5.499, + "step": 1274 + }, + { + "acc_char": 0.3112, + "acc_token": 0.6988, + "epoch": 0.02, + "loss_char": 0.6827, + "loss_token": 1.5999, + "lr": "2.00e-07", + "norm": 5.6463, + "step": 1275 + }, + { + "acc_char": 0.3173, + "acc_token": 0.7016, + "epoch": 0.02, + "loss_char": 0.6897, + "loss_token": 1.515, + "lr": "2.00e-07", + "norm": 5.4869, + "step": 1276 + }, + { + "acc_char": 0.2954, + "acc_token": 0.6773, + "epoch": 0.02, + "loss_char": 0.7179, + "loss_token": 1.6558, + "lr": "2.00e-07", + "norm": 5.7433, + "step": 1277 + }, + { + "acc_char": 0.3029, + "acc_token": 0.6768, + "epoch": 0.02, + "loss_char": 0.7316, + "loss_token": 1.5784, + "lr": "2.00e-07", + "norm": 5.8841, + "step": 1278 + }, + { + "acc_char": 0.2838, + "acc_token": 0.6648, + "epoch": 0.02, + "loss_char": 0.7332, + "loss_token": 1.621, + "lr": "2.00e-07", + "norm": 5.819, + "step": 1279 + }, + { + "acc_char": 0.309, + "acc_token": 0.6922, + "epoch": 0.02, + "loss_char": 0.6954, + "loss_token": 1.5557, + "lr": "2.00e-07", + "norm": 5.6622, + "step": 1280 + }, + { + "acc_char": 0.2977, + "acc_token": 0.6724, + "epoch": 0.02, + "loss_char": 0.7497, + "loss_token": 1.6469, + "lr": "2.00e-07", + "norm": 6.0357, + "step": 1281 + }, + { + "acc_char": 0.2991, + "acc_token": 0.693, + "epoch": 0.02, + "loss_char": 0.6675, + "loss_token": 1.5335, + "lr": "2.00e-07", + "norm": 5.398, + "step": 1282 + }, + { + "acc_char": 0.2906, + "acc_token": 0.6389, + "epoch": 0.02, + "loss_char": 0.8624, + "loss_token": 1.8026, + "lr": "2.00e-07", + "norm": 5.4403, + "step": 1283 + }, + { + "acc_char": 0.3027, + "acc_token": 0.6786, + "epoch": 0.02, + "loss_char": 0.7183, + "loss_token": 1.567, + "lr": "2.00e-07", + "norm": 5.4143, + "step": 1284 + }, + { + "acc_char": 0.2908, + "acc_token": 0.6395, + "epoch": 0.02, + "loss_char": 0.8086, + "loss_token": 1.671, + "lr": "2.00e-07", + "norm": 5.4947, + "step": 1285 + }, + { + "acc_char": 0.3081, + "acc_token": 0.6976, + "epoch": 0.02, + "loss_char": 0.674, + "loss_token": 1.5783, + "lr": "2.00e-07", + "norm": 5.4502, + "step": 1286 + }, + { + "acc_char": 0.2987, + "acc_token": 0.6731, + "epoch": 0.02, + "loss_char": 0.7479, + "loss_token": 1.6846, + "lr": "2.00e-07", + "norm": 5.959, + "step": 1287 + }, + { + "acc_char": 0.3125, + "acc_token": 0.6928, + "epoch": 0.02, + "loss_char": 0.7215, + "loss_token": 1.5817, + "lr": "2.00e-07", + "norm": 5.5225, + "step": 1288 + }, + { + "acc_char": 0.2924, + "acc_token": 0.6698, + "epoch": 0.02, + "loss_char": 0.7396, + "loss_token": 1.5918, + "lr": "2.00e-07", + "norm": 5.7961, + "step": 1289 + }, + { + "acc_char": 0.3046, + "acc_token": 0.6958, + "epoch": 0.02, + "loss_char": 0.6932, + "loss_token": 1.5588, + "lr": "2.00e-07", + "norm": 5.6947, + "step": 1290 + }, + { + "acc_char": 0.2926, + "acc_token": 0.6789, + "epoch": 0.02, + "loss_char": 0.6949, + "loss_token": 1.6479, + "lr": "2.00e-07", + "norm": 5.9985, + "step": 1291 + }, + { + "acc_char": 0.2909, + "acc_token": 0.6287, + "epoch": 0.02, + "loss_char": 0.8573, + "loss_token": 1.7134, + "lr": "2.00e-07", + "norm": 5.5973, + "step": 1292 + }, + { + "acc_char": 0.3081, + "acc_token": 0.702, + "epoch": 0.02, + "loss_char": 0.655, + "loss_token": 1.5874, + "lr": "2.00e-07", + "norm": 5.5553, + "step": 1293 + }, + { + "acc_char": 0.2935, + "acc_token": 0.6674, + "epoch": 0.02, + "loss_char": 0.7717, + "loss_token": 1.6123, + "lr": "2.00e-07", + "norm": 5.7581, + "step": 1294 + }, + { + "acc_char": 0.3015, + "acc_token": 0.6914, + "epoch": 0.02, + "loss_char": 0.6618, + "loss_token": 1.5497, + "lr": "2.00e-07", + "norm": 5.5086, + "step": 1295 + }, + { + "acc_char": 0.303, + "acc_token": 0.6814, + "epoch": 0.02, + "loss_char": 0.7289, + "loss_token": 1.6631, + "lr": "2.00e-07", + "norm": 5.679, + "step": 1296 + }, + { + "acc_char": 0.2966, + "acc_token": 0.682, + "epoch": 0.02, + "loss_char": 0.7135, + "loss_token": 1.5486, + "lr": "2.00e-07", + "norm": 5.9415, + "step": 1297 + }, + { + "acc_char": 0.3011, + "acc_token": 0.6693, + "epoch": 0.02, + "loss_char": 0.777, + "loss_token": 1.6275, + "lr": "2.00e-07", + "norm": 6.2361, + "step": 1298 + }, + { + "acc_char": 0.3191, + "acc_token": 0.706, + "epoch": 0.02, + "loss_char": 0.6841, + "loss_token": 1.5494, + "lr": "2.00e-07", + "norm": 5.7965, + "step": 1299 + }, + { + "acc_char": 0.3061, + "acc_token": 0.6798, + "epoch": 0.02, + "loss_char": 0.7659, + "loss_token": 1.6338, + "lr": "2.00e-07", + "norm": 5.727, + "step": 1300 + }, + { + "acc_char": 0.2989, + "acc_token": 0.6424, + "epoch": 0.02, + "loss_char": 0.8275, + "loss_token": 1.6732, + "lr": "2.00e-07", + "norm": 5.9103, + "step": 1301 + }, + { + "acc_char": 0.3073, + "acc_token": 0.6837, + "epoch": 0.02, + "loss_char": 0.7156, + "loss_token": 1.5905, + "lr": "2.00e-07", + "norm": 5.7508, + "step": 1302 + }, + { + "acc_char": 0.3092, + "acc_token": 0.7093, + "epoch": 0.02, + "loss_char": 0.6465, + "loss_token": 1.5352, + "lr": "2.00e-07", + "norm": 5.3712, + "step": 1303 + }, + { + "acc_char": 0.3046, + "acc_token": 0.6859, + "epoch": 0.02, + "loss_char": 0.7196, + "loss_token": 1.6089, + "lr": "2.00e-07", + "norm": 5.7123, + "step": 1304 + }, + { + "acc_char": 0.295, + "acc_token": 0.6642, + "epoch": 0.02, + "loss_char": 0.7776, + "loss_token": 1.6871, + "lr": "2.00e-07", + "norm": 5.9281, + "step": 1305 + }, + { + "acc_char": 0.3124, + "acc_token": 0.7126, + "epoch": 0.02, + "loss_char": 0.6216, + "loss_token": 1.4293, + "lr": "2.00e-07", + "norm": 5.403, + "step": 1306 + }, + { + "acc_char": 0.2982, + "acc_token": 0.6654, + "epoch": 0.02, + "loss_char": 0.7713, + "loss_token": 1.7133, + "lr": "2.00e-07", + "norm": 6.1354, + "step": 1307 + }, + { + "acc_char": 0.2972, + "acc_token": 0.6834, + "epoch": 0.02, + "loss_char": 0.7293, + "loss_token": 1.5969, + "lr": "2.00e-07", + "norm": 5.713, + "step": 1308 + }, + { + "acc_char": 0.3121, + "acc_token": 0.6991, + "epoch": 0.02, + "loss_char": 0.6729, + "loss_token": 1.5578, + "lr": "2.00e-07", + "norm": 5.6073, + "step": 1309 + }, + { + "acc_char": 0.3035, + "acc_token": 0.6826, + "epoch": 0.02, + "loss_char": 0.7313, + "loss_token": 1.647, + "lr": "2.00e-07", + "norm": 5.9066, + "step": 1310 + }, + { + "acc_char": 0.3092, + "acc_token": 0.6894, + "epoch": 0.02, + "loss_char": 0.712, + "loss_token": 1.5379, + "lr": "2.00e-07", + "norm": 5.5333, + "step": 1311 + }, + { + "acc_char": 0.3037, + "acc_token": 0.6957, + "epoch": 0.02, + "loss_char": 0.6818, + "loss_token": 1.5175, + "lr": "2.00e-07", + "norm": 6.0989, + "step": 1312 + }, + { + "acc_char": 0.3087, + "acc_token": 0.6977, + "epoch": 0.02, + "loss_char": 0.6731, + "loss_token": 1.5837, + "lr": "2.00e-07", + "norm": 5.8096, + "step": 1313 + }, + { + "acc_char": 0.3152, + "acc_token": 0.7096, + "epoch": 0.02, + "loss_char": 0.6565, + "loss_token": 1.5564, + "lr": "2.00e-07", + "norm": 5.4834, + "step": 1314 + }, + { + "acc_char": 0.311, + "acc_token": 0.7085, + "epoch": 0.02, + "loss_char": 0.6535, + "loss_token": 1.5775, + "lr": "2.00e-07", + "norm": 5.5097, + "step": 1315 + }, + { + "acc_char": 0.2956, + "acc_token": 0.6631, + "epoch": 0.02, + "loss_char": 0.7923, + "loss_token": 1.713, + "lr": "2.00e-07", + "norm": 5.9354, + "step": 1316 + }, + { + "acc_char": 0.3073, + "acc_token": 0.6969, + "epoch": 0.02, + "loss_char": 0.6925, + "loss_token": 1.6065, + "lr": "2.00e-07", + "norm": 6.1533, + "step": 1317 + }, + { + "acc_char": 0.3259, + "acc_token": 0.7287, + "epoch": 0.02, + "loss_char": 0.5966, + "loss_token": 1.4401, + "lr": "2.00e-07", + "norm": 5.7195, + "step": 1318 + }, + { + "acc_char": 0.3141, + "acc_token": 0.7055, + "epoch": 0.02, + "loss_char": 0.6725, + "loss_token": 1.5512, + "lr": "2.00e-07", + "norm": 5.7712, + "step": 1319 + }, + { + "acc_char": 0.3002, + "acc_token": 0.6793, + "epoch": 0.02, + "loss_char": 0.7412, + "loss_token": 1.587, + "lr": "2.00e-07", + "norm": 5.6212, + "step": 1320 + }, + { + "acc_char": 0.3085, + "acc_token": 0.699, + "epoch": 0.02, + "loss_char": 0.6889, + "loss_token": 1.6217, + "lr": "2.00e-07", + "norm": 5.915, + "step": 1321 + }, + { + "acc_char": 0.3151, + "acc_token": 0.6983, + "epoch": 0.02, + "loss_char": 0.7024, + "loss_token": 1.581, + "lr": "2.00e-07", + "norm": 5.8306, + "step": 1322 + }, + { + "acc_char": 0.3029, + "acc_token": 0.6853, + "epoch": 0.02, + "loss_char": 0.7054, + "loss_token": 1.6941, + "lr": "2.00e-07", + "norm": 5.7394, + "step": 1323 + }, + { + "acc_char": 0.3063, + "acc_token": 0.706, + "epoch": 0.02, + "loss_char": 0.6812, + "loss_token": 1.5992, + "lr": "2.00e-07", + "norm": 5.8773, + "step": 1324 + }, + { + "acc_char": 0.3093, + "acc_token": 0.6971, + "epoch": 0.02, + "loss_char": 0.6921, + "loss_token": 1.5585, + "lr": "2.00e-07", + "norm": 5.6204, + "step": 1325 + }, + { + "acc_char": 0.2947, + "acc_token": 0.6644, + "epoch": 0.02, + "loss_char": 0.7743, + "loss_token": 1.6958, + "lr": "2.00e-07", + "norm": 5.856, + "step": 1326 + }, + { + "acc_char": 0.3062, + "acc_token": 0.6977, + "epoch": 0.02, + "loss_char": 0.6612, + "loss_token": 1.5455, + "lr": "2.00e-07", + "norm": 5.5659, + "step": 1327 + }, + { + "acc_char": 0.3122, + "acc_token": 0.6991, + "epoch": 0.02, + "loss_char": 0.6977, + "loss_token": 1.5435, + "lr": "2.00e-07", + "norm": 5.9666, + "step": 1328 + }, + { + "acc_char": 0.2953, + "acc_token": 0.6732, + "epoch": 0.02, + "loss_char": 0.7336, + "loss_token": 1.7345, + "lr": "2.00e-07", + "norm": 5.6002, + "step": 1329 + }, + { + "acc_char": 0.3132, + "acc_token": 0.7081, + "epoch": 0.02, + "loss_char": 0.6456, + "loss_token": 1.5861, + "lr": "2.00e-07", + "norm": 5.6949, + "step": 1330 + }, + { + "acc_char": 0.302, + "acc_token": 0.6938, + "epoch": 0.02, + "loss_char": 0.6581, + "loss_token": 1.5993, + "lr": "2.00e-07", + "norm": 6.3812, + "step": 1331 + }, + { + "acc_char": 0.3111, + "acc_token": 0.7023, + "epoch": 0.02, + "loss_char": 0.6768, + "loss_token": 1.5597, + "lr": "2.00e-07", + "norm": 5.5679, + "step": 1332 + }, + { + "acc_char": 0.3062, + "acc_token": 0.6841, + "epoch": 0.02, + "loss_char": 0.7264, + "loss_token": 1.6529, + "lr": "2.00e-07", + "norm": 5.9286, + "step": 1333 + }, + { + "acc_char": 0.3059, + "acc_token": 0.7084, + "epoch": 0.02, + "loss_char": 0.6333, + "loss_token": 1.492, + "lr": "2.00e-07", + "norm": 5.4609, + "step": 1334 + }, + { + "acc_char": 0.3002, + "acc_token": 0.6916, + "epoch": 0.02, + "loss_char": 0.666, + "loss_token": 1.5257, + "lr": "2.00e-07", + "norm": 5.5032, + "step": 1335 + }, + { + "acc_char": 0.3002, + "acc_token": 0.6851, + "epoch": 0.02, + "loss_char": 0.715, + "loss_token": 1.669, + "lr": "2.00e-07", + "norm": 5.89, + "step": 1336 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6808, + "epoch": 0.02, + "loss_char": 0.726, + "loss_token": 1.6113, + "lr": "2.00e-07", + "norm": 5.6423, + "step": 1337 + }, + { + "acc_char": 0.301, + "acc_token": 0.6771, + "epoch": 0.02, + "loss_char": 0.736, + "loss_token": 1.6483, + "lr": "2.00e-07", + "norm": 6.1082, + "step": 1338 + }, + { + "acc_char": 0.2952, + "acc_token": 0.6747, + "epoch": 0.02, + "loss_char": 0.769, + "loss_token": 1.6764, + "lr": "2.00e-07", + "norm": 5.9451, + "step": 1339 + }, + { + "acc_char": 0.2922, + "acc_token": 0.667, + "epoch": 0.02, + "loss_char": 0.7571, + "loss_token": 1.6656, + "lr": "2.00e-07", + "norm": 5.824, + "step": 1340 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6644, + "epoch": 0.02, + "loss_char": 0.8147, + "loss_token": 1.7101, + "lr": "2.00e-07", + "norm": 5.9317, + "step": 1341 + }, + { + "acc_char": 0.2979, + "acc_token": 0.6884, + "epoch": 0.02, + "loss_char": 0.7192, + "loss_token": 1.6335, + "lr": "2.00e-07", + "norm": 5.9221, + "step": 1342 + }, + { + "acc_char": 0.3071, + "acc_token": 0.6797, + "epoch": 0.02, + "loss_char": 0.7358, + "loss_token": 1.6014, + "lr": "2.00e-07", + "norm": 5.824, + "step": 1343 + }, + { + "acc_char": 0.3119, + "acc_token": 0.6764, + "epoch": 0.02, + "loss_char": 0.7539, + "loss_token": 1.7648, + "lr": "2.00e-07", + "norm": 6.1267, + "step": 1344 + }, + { + "acc_char": 0.2983, + "acc_token": 0.6722, + "epoch": 0.02, + "loss_char": 0.7617, + "loss_token": 1.63, + "lr": "2.00e-07", + "norm": 5.8447, + "step": 1345 + }, + { + "acc_char": 0.3093, + "acc_token": 0.7008, + "epoch": 0.02, + "loss_char": 0.6767, + "loss_token": 1.5799, + "lr": "2.00e-07", + "norm": 5.5396, + "step": 1346 + }, + { + "acc_char": 0.2933, + "acc_token": 0.6702, + "epoch": 0.02, + "loss_char": 0.7454, + "loss_token": 1.6384, + "lr": "2.00e-07", + "norm": 5.709, + "step": 1347 + }, + { + "acc_char": 0.2914, + "acc_token": 0.6709, + "epoch": 0.02, + "loss_char": 0.7337, + "loss_token": 1.592, + "lr": "2.00e-07", + "norm": 5.3753, + "step": 1348 + }, + { + "acc_char": 0.3056, + "acc_token": 0.6938, + "epoch": 0.02, + "loss_char": 0.6961, + "loss_token": 1.6395, + "lr": "2.00e-07", + "norm": 5.6499, + "step": 1349 + }, + { + "acc_char": 0.3064, + "acc_token": 0.677, + "epoch": 0.02, + "loss_char": 0.7697, + "loss_token": 1.7278, + "lr": "2.00e-07", + "norm": 6.3553, + "step": 1350 + }, + { + "acc_char": 0.3031, + "acc_token": 0.669, + "epoch": 0.02, + "loss_char": 0.8016, + "loss_token": 1.6509, + "lr": "2.00e-07", + "norm": 6.1227, + "step": 1351 + }, + { + "acc_char": 0.2973, + "acc_token": 0.6812, + "epoch": 0.02, + "loss_char": 0.7068, + "loss_token": 1.6572, + "lr": "2.00e-07", + "norm": 5.6062, + "step": 1352 + }, + { + "acc_char": 0.3016, + "acc_token": 0.7348, + "epoch": 0.02, + "loss_char": 0.545, + "loss_token": 1.4678, + "lr": "2.00e-07", + "norm": 5.5256, + "step": 1353 + }, + { + "acc_char": 0.2859, + "acc_token": 0.6595, + "epoch": 0.02, + "loss_char": 0.7825, + "loss_token": 1.7206, + "lr": "2.00e-07", + "norm": 5.6409, + "step": 1354 + }, + { + "acc_char": 0.3046, + "acc_token": 0.6902, + "epoch": 0.02, + "loss_char": 0.696, + "loss_token": 1.6006, + "lr": "2.00e-07", + "norm": 6.1147, + "step": 1355 + }, + { + "acc_char": 0.3, + "acc_token": 0.666, + "epoch": 0.02, + "loss_char": 0.805, + "loss_token": 1.7478, + "lr": "2.00e-07", + "norm": 5.9633, + "step": 1356 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6793, + "epoch": 0.02, + "loss_char": 0.7234, + "loss_token": 1.6125, + "lr": "2.00e-07", + "norm": 5.8055, + "step": 1357 + }, + { + "acc_char": 0.3124, + "acc_token": 0.704, + "epoch": 0.02, + "loss_char": 0.6558, + "loss_token": 1.5236, + "lr": "2.00e-07", + "norm": 5.401, + "step": 1358 + }, + { + "acc_char": 0.3125, + "acc_token": 0.7084, + "epoch": 0.02, + "loss_char": 0.652, + "loss_token": 1.5135, + "lr": "2.00e-07", + "norm": 5.7235, + "step": 1359 + }, + { + "acc_char": 0.3183, + "acc_token": 0.7207, + "epoch": 0.02, + "loss_char": 0.6271, + "loss_token": 1.503, + "lr": "2.00e-07", + "norm": 5.5718, + "step": 1360 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6753, + "epoch": 0.02, + "loss_char": 0.7615, + "loss_token": 1.6756, + "lr": "2.00e-07", + "norm": 5.9743, + "step": 1361 + }, + { + "acc_char": 0.3221, + "acc_token": 0.7169, + "epoch": 0.02, + "loss_char": 0.6669, + "loss_token": 1.5328, + "lr": "2.00e-07", + "norm": 5.9282, + "step": 1362 + }, + { + "acc_char": 0.2839, + "acc_token": 0.6536, + "epoch": 0.02, + "loss_char": 0.7904, + "loss_token": 1.7676, + "lr": "2.00e-07", + "norm": 6.4369, + "step": 1363 + }, + { + "acc_char": 0.3128, + "acc_token": 0.6961, + "epoch": 0.02, + "loss_char": 0.6898, + "loss_token": 1.542, + "lr": "2.00e-07", + "norm": 5.5994, + "step": 1364 + }, + { + "acc_char": 0.2969, + "acc_token": 0.6742, + "epoch": 0.02, + "loss_char": 0.7332, + "loss_token": 1.6879, + "lr": "2.00e-07", + "norm": 6.1954, + "step": 1365 + }, + { + "acc_char": 0.2925, + "acc_token": 0.6585, + "epoch": 0.03, + "loss_char": 0.7898, + "loss_token": 1.7, + "lr": "2.00e-07", + "norm": 6.0185, + "step": 1366 + }, + { + "acc_char": 0.2989, + "acc_token": 0.6691, + "epoch": 0.03, + "loss_char": 0.756, + "loss_token": 1.6255, + "lr": "2.00e-07", + "norm": 6.3259, + "step": 1367 + }, + { + "acc_char": 0.3114, + "acc_token": 0.7018, + "epoch": 0.03, + "loss_char": 0.6807, + "loss_token": 1.669, + "lr": "2.00e-07", + "norm": 5.9932, + "step": 1368 + }, + { + "acc_char": 0.2956, + "acc_token": 0.6205, + "epoch": 0.03, + "loss_char": 0.7947, + "loss_token": 1.6081, + "lr": "2.00e-07", + "norm": 5.4512, + "step": 1369 + }, + { + "acc_char": 0.3231, + "acc_token": 0.7104, + "epoch": 0.03, + "loss_char": 0.672, + "loss_token": 1.5403, + "lr": "2.00e-07", + "norm": 6.2026, + "step": 1370 + }, + { + "acc_char": 0.3035, + "acc_token": 0.6891, + "epoch": 0.03, + "loss_char": 0.7086, + "loss_token": 1.5575, + "lr": "2.00e-07", + "norm": 5.5939, + "step": 1371 + }, + { + "acc_char": 0.3126, + "acc_token": 0.7002, + "epoch": 0.03, + "loss_char": 0.7236, + "loss_token": 1.6196, + "lr": "2.00e-07", + "norm": 5.4226, + "step": 1372 + }, + { + "acc_char": 0.3135, + "acc_token": 0.7063, + "epoch": 0.03, + "loss_char": 0.6697, + "loss_token": 1.4176, + "lr": "2.00e-07", + "norm": 5.5692, + "step": 1373 + }, + { + "acc_char": 0.3084, + "acc_token": 0.6858, + "epoch": 0.03, + "loss_char": 0.7254, + "loss_token": 1.6299, + "lr": "2.00e-07", + "norm": 5.6955, + "step": 1374 + }, + { + "acc_char": 0.2914, + "acc_token": 0.6656, + "epoch": 0.03, + "loss_char": 0.7616, + "loss_token": 1.6862, + "lr": "2.00e-07", + "norm": 6.1186, + "step": 1375 + }, + { + "acc_char": 0.303, + "acc_token": 0.6843, + "epoch": 0.03, + "loss_char": 0.7165, + "loss_token": 1.5999, + "lr": "2.00e-07", + "norm": 6.0434, + "step": 1376 + }, + { + "acc_char": 0.305, + "acc_token": 0.6946, + "epoch": 0.03, + "loss_char": 0.6948, + "loss_token": 1.5296, + "lr": "2.00e-07", + "norm": 5.7242, + "step": 1377 + }, + { + "acc_char": 0.312, + "acc_token": 0.6938, + "epoch": 0.03, + "loss_char": 0.717, + "loss_token": 1.5859, + "lr": "2.00e-07", + "norm": 5.6534, + "step": 1378 + }, + { + "acc_char": 0.2975, + "acc_token": 0.6662, + "epoch": 0.03, + "loss_char": 0.7601, + "loss_token": 1.638, + "lr": "2.00e-07", + "norm": 6.0556, + "step": 1379 + }, + { + "acc_char": 0.302, + "acc_token": 0.677, + "epoch": 0.03, + "loss_char": 0.7621, + "loss_token": 1.6764, + "lr": "2.00e-07", + "norm": 5.7392, + "step": 1380 + }, + { + "acc_char": 0.3123, + "acc_token": 0.7012, + "epoch": 0.03, + "loss_char": 0.6862, + "loss_token": 1.6282, + "lr": "2.00e-07", + "norm": 5.8201, + "step": 1381 + }, + { + "acc_char": 0.3136, + "acc_token": 0.7041, + "epoch": 0.03, + "loss_char": 0.6634, + "loss_token": 1.4637, + "lr": "2.00e-07", + "norm": 5.4737, + "step": 1382 + }, + { + "acc_char": 0.3047, + "acc_token": 0.6831, + "epoch": 0.03, + "loss_char": 0.7225, + "loss_token": 1.6879, + "lr": "2.00e-07", + "norm": 5.8436, + "step": 1383 + }, + { + "acc_char": 0.3059, + "acc_token": 0.6894, + "epoch": 0.03, + "loss_char": 0.7192, + "loss_token": 1.633, + "lr": "2.00e-07", + "norm": 5.9087, + "step": 1384 + }, + { + "acc_char": 0.3098, + "acc_token": 0.7056, + "epoch": 0.03, + "loss_char": 0.6613, + "loss_token": 1.4887, + "lr": "2.00e-07", + "norm": 5.5427, + "step": 1385 + }, + { + "acc_char": 0.2913, + "acc_token": 0.6533, + "epoch": 0.03, + "loss_char": 0.8128, + "loss_token": 1.6987, + "lr": "2.00e-07", + "norm": 5.9285, + "step": 1386 + }, + { + "acc_char": 0.306, + "acc_token": 0.6957, + "epoch": 0.03, + "loss_char": 0.6774, + "loss_token": 1.5901, + "lr": "2.00e-07", + "norm": 5.6079, + "step": 1387 + }, + { + "acc_char": 0.3, + "acc_token": 0.6777, + "epoch": 0.03, + "loss_char": 0.7392, + "loss_token": 1.6184, + "lr": "2.00e-07", + "norm": 5.5041, + "step": 1388 + }, + { + "acc_char": 0.3064, + "acc_token": 0.6982, + "epoch": 0.03, + "loss_char": 0.6812, + "loss_token": 1.6384, + "lr": "2.00e-07", + "norm": 6.0128, + "step": 1389 + }, + { + "acc_char": 0.2931, + "acc_token": 0.6757, + "epoch": 0.03, + "loss_char": 0.7242, + "loss_token": 1.5323, + "lr": "2.00e-07", + "norm": 5.6286, + "step": 1390 + }, + { + "acc_char": 0.3113, + "acc_token": 0.6916, + "epoch": 0.03, + "loss_char": 0.7159, + "loss_token": 1.6158, + "lr": "2.00e-07", + "norm": 5.7847, + "step": 1391 + }, + { + "acc_char": 0.3085, + "acc_token": 0.7056, + "epoch": 0.03, + "loss_char": 0.6436, + "loss_token": 1.476, + "lr": "2.00e-07", + "norm": 5.8416, + "step": 1392 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6734, + "epoch": 0.03, + "loss_char": 0.7523, + "loss_token": 1.5721, + "lr": "2.00e-07", + "norm": 5.6675, + "step": 1393 + }, + { + "acc_char": 0.3048, + "acc_token": 0.6848, + "epoch": 0.03, + "loss_char": 0.7287, + "loss_token": 1.5632, + "lr": "2.00e-07", + "norm": 5.4626, + "step": 1394 + }, + { + "acc_char": 0.3077, + "acc_token": 0.6963, + "epoch": 0.03, + "loss_char": 0.6829, + "loss_token": 1.667, + "lr": "2.00e-07", + "norm": 5.6939, + "step": 1395 + }, + { + "acc_char": 0.3158, + "acc_token": 0.7124, + "epoch": 0.03, + "loss_char": 0.6541, + "loss_token": 1.6637, + "lr": "2.00e-07", + "norm": 5.609, + "step": 1396 + }, + { + "acc_char": 0.3019, + "acc_token": 0.6927, + "epoch": 0.03, + "loss_char": 0.6862, + "loss_token": 1.5703, + "lr": "2.00e-07", + "norm": 5.6961, + "step": 1397 + }, + { + "acc_char": 0.3018, + "acc_token": 0.6779, + "epoch": 0.03, + "loss_char": 0.723, + "loss_token": 1.6526, + "lr": "2.00e-07", + "norm": 5.8147, + "step": 1398 + }, + { + "acc_char": 0.2908, + "acc_token": 0.6306, + "epoch": 0.03, + "loss_char": 0.8343, + "loss_token": 1.6166, + "lr": "2.00e-07", + "norm": 5.3458, + "step": 1399 + }, + { + "acc_char": 0.3062, + "acc_token": 0.7036, + "epoch": 0.03, + "loss_char": 0.6465, + "loss_token": 1.5848, + "lr": "2.00e-07", + "norm": 5.6703, + "step": 1400 + }, + { + "acc_char": 0.3115, + "acc_token": 0.7026, + "epoch": 0.03, + "loss_char": 0.6719, + "loss_token": 1.5331, + "lr": "2.00e-07", + "norm": 5.415, + "step": 1401 + }, + { + "acc_char": 0.305, + "acc_token": 0.6994, + "epoch": 0.03, + "loss_char": 0.6684, + "loss_token": 1.6298, + "lr": "2.00e-07", + "norm": 5.7916, + "step": 1402 + }, + { + "acc_char": 0.3037, + "acc_token": 0.6807, + "epoch": 0.03, + "loss_char": 0.746, + "loss_token": 1.5851, + "lr": "2.00e-07", + "norm": 5.7043, + "step": 1403 + }, + { + "acc_char": 0.3037, + "acc_token": 0.6985, + "epoch": 0.03, + "loss_char": 0.6662, + "loss_token": 1.5652, + "lr": "2.00e-07", + "norm": 5.7722, + "step": 1404 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6844, + "epoch": 0.03, + "loss_char": 0.7001, + "loss_token": 1.5728, + "lr": "2.00e-07", + "norm": 5.6028, + "step": 1405 + }, + { + "acc_char": 0.292, + "acc_token": 0.6601, + "epoch": 0.03, + "loss_char": 0.8013, + "loss_token": 1.6926, + "lr": "2.00e-07", + "norm": 6.0242, + "step": 1406 + }, + { + "acc_char": 0.3058, + "acc_token": 0.698, + "epoch": 0.03, + "loss_char": 0.6768, + "loss_token": 1.5712, + "lr": "2.00e-07", + "norm": 5.611, + "step": 1407 + }, + { + "acc_char": 0.3038, + "acc_token": 0.6904, + "epoch": 0.03, + "loss_char": 0.6899, + "loss_token": 1.624, + "lr": "2.00e-07", + "norm": 6.0308, + "step": 1408 + }, + { + "acc_char": 0.2939, + "acc_token": 0.669, + "epoch": 0.03, + "loss_char": 0.748, + "loss_token": 1.6023, + "lr": "2.00e-07", + "norm": 5.6687, + "step": 1409 + }, + { + "acc_char": 0.2921, + "acc_token": 0.669, + "epoch": 0.03, + "loss_char": 0.7351, + "loss_token": 1.6203, + "lr": "2.00e-07", + "norm": 5.5791, + "step": 1410 + }, + { + "acc_char": 0.2854, + "acc_token": 0.6869, + "epoch": 0.03, + "loss_char": 0.6622, + "loss_token": 1.6635, + "lr": "2.00e-07", + "norm": 7.0393, + "step": 1411 + }, + { + "acc_char": 0.3082, + "acc_token": 0.6844, + "epoch": 0.03, + "loss_char": 0.7358, + "loss_token": 1.6045, + "lr": "2.00e-07", + "norm": 5.7435, + "step": 1412 + }, + { + "acc_char": 0.3065, + "acc_token": 0.7006, + "epoch": 0.03, + "loss_char": 0.6737, + "loss_token": 1.5908, + "lr": "2.00e-07", + "norm": 5.6507, + "step": 1413 + }, + { + "acc_char": 0.3041, + "acc_token": 0.6997, + "epoch": 0.03, + "loss_char": 0.6748, + "loss_token": 1.5888, + "lr": "2.00e-07", + "norm": 5.6674, + "step": 1414 + }, + { + "acc_char": 0.3062, + "acc_token": 0.6914, + "epoch": 0.03, + "loss_char": 0.7032, + "loss_token": 1.5616, + "lr": "2.00e-07", + "norm": 5.8995, + "step": 1415 + }, + { + "acc_char": 0.291, + "acc_token": 0.6312, + "epoch": 0.03, + "loss_char": 0.86, + "loss_token": 1.7505, + "lr": "2.00e-07", + "norm": 5.5878, + "step": 1416 + }, + { + "acc_char": 0.3097, + "acc_token": 0.6963, + "epoch": 0.03, + "loss_char": 0.672, + "loss_token": 1.5904, + "lr": "2.00e-07", + "norm": 5.6347, + "step": 1417 + }, + { + "acc_char": 0.3116, + "acc_token": 0.6902, + "epoch": 0.03, + "loss_char": 0.7182, + "loss_token": 1.572, + "lr": "2.00e-07", + "norm": 5.7473, + "step": 1418 + }, + { + "acc_char": 0.3138, + "acc_token": 0.7077, + "epoch": 0.03, + "loss_char": 0.677, + "loss_token": 1.5683, + "lr": "2.00e-07", + "norm": 5.97, + "step": 1419 + }, + { + "acc_char": 0.3232, + "acc_token": 0.7237, + "epoch": 0.03, + "loss_char": 0.6442, + "loss_token": 1.585, + "lr": "2.00e-07", + "norm": 6.0055, + "step": 1420 + }, + { + "acc_char": 0.3097, + "acc_token": 0.6964, + "epoch": 0.03, + "loss_char": 0.6874, + "loss_token": 1.5741, + "lr": "2.00e-07", + "norm": 5.5152, + "step": 1421 + }, + { + "acc_char": 0.3156, + "acc_token": 0.7126, + "epoch": 0.03, + "loss_char": 0.6461, + "loss_token": 1.4811, + "lr": "2.00e-07", + "norm": 5.4544, + "step": 1422 + }, + { + "acc_char": 0.3079, + "acc_token": 0.704, + "epoch": 0.03, + "loss_char": 0.6918, + "loss_token": 1.6122, + "lr": "2.00e-07", + "norm": 5.8757, + "step": 1423 + }, + { + "acc_char": 0.3054, + "acc_token": 0.6862, + "epoch": 0.03, + "loss_char": 0.7286, + "loss_token": 1.595, + "lr": "2.00e-07", + "norm": 5.8165, + "step": 1424 + }, + { + "acc_char": 0.3133, + "acc_token": 0.6962, + "epoch": 0.03, + "loss_char": 0.6887, + "loss_token": 1.6077, + "lr": "2.00e-07", + "norm": 6.1779, + "step": 1425 + }, + { + "acc_char": 0.312, + "acc_token": 0.7053, + "epoch": 0.03, + "loss_char": 0.6846, + "loss_token": 1.5866, + "lr": "2.00e-07", + "norm": 5.7713, + "step": 1426 + }, + { + "acc_char": 0.2996, + "acc_token": 0.6674, + "epoch": 0.03, + "loss_char": 0.787, + "loss_token": 1.6678, + "lr": "2.00e-07", + "norm": 5.5794, + "step": 1427 + }, + { + "acc_char": 0.2895, + "acc_token": 0.6244, + "epoch": 0.03, + "loss_char": 0.8862, + "loss_token": 1.8093, + "lr": "2.00e-07", + "norm": 5.699, + "step": 1428 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6784, + "epoch": 0.03, + "loss_char": 0.7447, + "loss_token": 1.6302, + "lr": "2.00e-07", + "norm": 5.6967, + "step": 1429 + }, + { + "acc_char": 0.3022, + "acc_token": 0.6747, + "epoch": 0.03, + "loss_char": 0.7457, + "loss_token": 1.6382, + "lr": "2.00e-07", + "norm": 5.7382, + "step": 1430 + }, + { + "acc_char": 0.2978, + "acc_token": 0.6589, + "epoch": 0.03, + "loss_char": 0.8084, + "loss_token": 1.668, + "lr": "2.00e-07", + "norm": 6.0445, + "step": 1431 + }, + { + "acc_char": 0.3157, + "acc_token": 0.7066, + "epoch": 0.03, + "loss_char": 0.6527, + "loss_token": 1.5359, + "lr": "2.00e-07", + "norm": 5.8243, + "step": 1432 + }, + { + "acc_char": 0.312, + "acc_token": 0.6989, + "epoch": 0.03, + "loss_char": 0.6841, + "loss_token": 1.4395, + "lr": "2.00e-07", + "norm": 5.8681, + "step": 1433 + }, + { + "acc_char": 0.3123, + "acc_token": 0.7054, + "epoch": 0.03, + "loss_char": 0.6706, + "loss_token": 1.5895, + "lr": "2.00e-07", + "norm": 6.5931, + "step": 1434 + }, + { + "acc_char": 0.3129, + "acc_token": 0.7109, + "epoch": 0.03, + "loss_char": 0.642, + "loss_token": 1.5843, + "lr": "2.00e-07", + "norm": 5.7947, + "step": 1435 + }, + { + "acc_char": 0.3108, + "acc_token": 0.7093, + "epoch": 0.03, + "loss_char": 0.646, + "loss_token": 1.552, + "lr": "2.00e-07", + "norm": 5.4622, + "step": 1436 + }, + { + "acc_char": 0.2952, + "acc_token": 0.6775, + "epoch": 0.03, + "loss_char": 0.7064, + "loss_token": 1.6145, + "lr": "2.00e-07", + "norm": 5.5288, + "step": 1437 + }, + { + "acc_char": 0.3134, + "acc_token": 0.6921, + "epoch": 0.03, + "loss_char": 0.6998, + "loss_token": 1.5238, + "lr": "2.00e-07", + "norm": 5.704, + "step": 1438 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6891, + "epoch": 0.03, + "loss_char": 0.6862, + "loss_token": 1.6609, + "lr": "2.00e-07", + "norm": 5.6822, + "step": 1439 + }, + { + "acc_char": 0.3086, + "acc_token": 0.7125, + "epoch": 0.03, + "loss_char": 0.6314, + "loss_token": 1.5158, + "lr": "2.00e-07", + "norm": 5.4196, + "step": 1440 + }, + { + "acc_char": 0.3088, + "acc_token": 0.7033, + "epoch": 0.03, + "loss_char": 0.6497, + "loss_token": 1.5208, + "lr": "2.00e-07", + "norm": 5.9641, + "step": 1441 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6797, + "epoch": 0.03, + "loss_char": 0.733, + "loss_token": 1.6274, + "lr": "2.00e-07", + "norm": 5.7354, + "step": 1442 + }, + { + "acc_char": 0.3165, + "acc_token": 0.7008, + "epoch": 0.03, + "loss_char": 0.6918, + "loss_token": 1.591, + "lr": "2.00e-07", + "norm": 5.808, + "step": 1443 + }, + { + "acc_char": 0.3011, + "acc_token": 0.674, + "epoch": 0.03, + "loss_char": 0.7647, + "loss_token": 1.7132, + "lr": "2.00e-07", + "norm": 5.7308, + "step": 1444 + }, + { + "acc_char": 0.3117, + "acc_token": 0.7057, + "epoch": 0.03, + "loss_char": 0.6518, + "loss_token": 1.6234, + "lr": "2.00e-07", + "norm": 5.8338, + "step": 1445 + }, + { + "acc_char": 0.3148, + "acc_token": 0.7049, + "epoch": 0.03, + "loss_char": 0.7024, + "loss_token": 1.5629, + "lr": "2.00e-07", + "norm": 5.7902, + "step": 1446 + }, + { + "acc_char": 0.3071, + "acc_token": 0.6901, + "epoch": 0.03, + "loss_char": 0.699, + "loss_token": 1.6437, + "lr": "2.00e-07", + "norm": 5.9924, + "step": 1447 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6833, + "epoch": 0.03, + "loss_char": 0.7037, + "loss_token": 1.5695, + "lr": "2.00e-07", + "norm": 5.6272, + "step": 1448 + }, + { + "acc_char": 0.2949, + "acc_token": 0.6685, + "epoch": 0.03, + "loss_char": 0.739, + "loss_token": 1.7014, + "lr": "2.00e-07", + "norm": 5.7195, + "step": 1449 + }, + { + "acc_char": 0.2872, + "acc_token": 0.6562, + "epoch": 0.03, + "loss_char": 0.7832, + "loss_token": 1.7406, + "lr": "2.00e-07", + "norm": 5.8079, + "step": 1450 + }, + { + "acc_char": 0.318, + "acc_token": 0.7229, + "epoch": 0.03, + "loss_char": 0.6029, + "loss_token": 1.4753, + "lr": "2.00e-07", + "norm": 5.6095, + "step": 1451 + }, + { + "acc_char": 0.3115, + "acc_token": 0.6985, + "epoch": 0.03, + "loss_char": 0.6745, + "loss_token": 1.5394, + "lr": "2.00e-07", + "norm": 5.5028, + "step": 1452 + }, + { + "acc_char": 0.2884, + "acc_token": 0.6616, + "epoch": 0.03, + "loss_char": 0.7668, + "loss_token": 1.6874, + "lr": "2.00e-07", + "norm": 5.9784, + "step": 1453 + }, + { + "acc_char": 0.3044, + "acc_token": 0.6927, + "epoch": 0.03, + "loss_char": 0.6746, + "loss_token": 1.6322, + "lr": "2.00e-07", + "norm": 5.9423, + "step": 1454 + }, + { + "acc_char": 0.297, + "acc_token": 0.6784, + "epoch": 0.03, + "loss_char": 0.7432, + "loss_token": 1.5928, + "lr": "2.00e-07", + "norm": 5.9516, + "step": 1455 + }, + { + "acc_char": 0.3164, + "acc_token": 0.7029, + "epoch": 0.03, + "loss_char": 0.6968, + "loss_token": 1.5889, + "lr": "2.00e-07", + "norm": 5.5862, + "step": 1456 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6886, + "epoch": 0.03, + "loss_char": 0.7043, + "loss_token": 1.7186, + "lr": "2.00e-07", + "norm": 5.8658, + "step": 1457 + }, + { + "acc_char": 0.3089, + "acc_token": 0.6924, + "epoch": 0.03, + "loss_char": 0.697, + "loss_token": 1.5579, + "lr": "2.00e-07", + "norm": 5.526, + "step": 1458 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6933, + "epoch": 0.03, + "loss_char": 0.6822, + "loss_token": 1.5683, + "lr": "2.00e-07", + "norm": 5.565, + "step": 1459 + }, + { + "acc_char": 0.3016, + "acc_token": 0.6906, + "epoch": 0.03, + "loss_char": 0.6766, + "loss_token": 1.6086, + "lr": "2.00e-07", + "norm": 5.7796, + "step": 1460 + }, + { + "acc_char": 0.2909, + "acc_token": 0.6303, + "epoch": 0.03, + "loss_char": 0.8754, + "loss_token": 1.7548, + "lr": "2.00e-07", + "norm": 5.8089, + "step": 1461 + }, + { + "acc_char": 0.3114, + "acc_token": 0.6996, + "epoch": 0.03, + "loss_char": 0.6875, + "loss_token": 1.634, + "lr": "2.00e-07", + "norm": 5.7081, + "step": 1462 + }, + { + "acc_char": 0.3136, + "acc_token": 0.6977, + "epoch": 0.03, + "loss_char": 0.7016, + "loss_token": 1.6071, + "lr": "2.00e-07", + "norm": 5.6163, + "step": 1463 + }, + { + "acc_char": 0.3071, + "acc_token": 0.6808, + "epoch": 0.03, + "loss_char": 0.7488, + "loss_token": 1.6655, + "lr": "2.00e-07", + "norm": 5.9807, + "step": 1464 + }, + { + "acc_char": 0.292, + "acc_token": 0.6687, + "epoch": 0.03, + "loss_char": 0.7498, + "loss_token": 1.6668, + "lr": "2.00e-07", + "norm": 5.646, + "step": 1465 + }, + { + "acc_char": 0.3112, + "acc_token": 0.6972, + "epoch": 0.03, + "loss_char": 0.6942, + "loss_token": 1.6347, + "lr": "2.00e-07", + "norm": 5.8645, + "step": 1466 + }, + { + "acc_char": 0.3131, + "acc_token": 0.709, + "epoch": 0.03, + "loss_char": 0.6439, + "loss_token": 1.5205, + "lr": "2.00e-07", + "norm": 5.8664, + "step": 1467 + }, + { + "acc_char": 0.3017, + "acc_token": 0.6935, + "epoch": 0.03, + "loss_char": 0.6633, + "loss_token": 1.5259, + "lr": "2.00e-07", + "norm": 5.7257, + "step": 1468 + }, + { + "acc_char": 0.3088, + "acc_token": 0.7001, + "epoch": 0.03, + "loss_char": 0.679, + "loss_token": 1.5258, + "lr": "2.00e-07", + "norm": 5.5526, + "step": 1469 + }, + { + "acc_char": 0.3113, + "acc_token": 0.6895, + "epoch": 0.03, + "loss_char": 0.7152, + "loss_token": 1.5858, + "lr": "2.00e-07", + "norm": 5.8972, + "step": 1470 + }, + { + "acc_char": 0.3095, + "acc_token": 0.6881, + "epoch": 0.03, + "loss_char": 0.7051, + "loss_token": 1.6035, + "lr": "2.00e-07", + "norm": 5.7331, + "step": 1471 + }, + { + "acc_char": 0.2944, + "acc_token": 0.6664, + "epoch": 0.03, + "loss_char": 0.7404, + "loss_token": 1.5585, + "lr": "2.00e-07", + "norm": 5.8637, + "step": 1472 + }, + { + "acc_char": 0.2941, + "acc_token": 0.681, + "epoch": 0.03, + "loss_char": 0.721, + "loss_token": 1.61, + "lr": "2.00e-07", + "norm": 5.8771, + "step": 1473 + }, + { + "acc_char": 0.3011, + "acc_token": 0.6886, + "epoch": 0.03, + "loss_char": 0.7031, + "loss_token": 1.7011, + "lr": "2.00e-07", + "norm": 5.6953, + "step": 1474 + }, + { + "acc_char": 0.2966, + "acc_token": 0.684, + "epoch": 0.03, + "loss_char": 0.6892, + "loss_token": 1.5817, + "lr": "2.00e-07", + "norm": 5.8247, + "step": 1475 + }, + { + "acc_char": 0.2882, + "acc_token": 0.6594, + "epoch": 0.03, + "loss_char": 0.7986, + "loss_token": 1.7027, + "lr": "2.00e-07", + "norm": 5.6554, + "step": 1476 + }, + { + "acc_char": 0.2976, + "acc_token": 0.6816, + "epoch": 0.03, + "loss_char": 0.7055, + "loss_token": 1.5926, + "lr": "2.00e-07", + "norm": 5.6155, + "step": 1477 + }, + { + "acc_char": 0.3091, + "acc_token": 0.6927, + "epoch": 0.03, + "loss_char": 0.6936, + "loss_token": 1.6087, + "lr": "2.00e-07", + "norm": 5.8617, + "step": 1478 + }, + { + "acc_char": 0.3095, + "acc_token": 0.6873, + "epoch": 0.03, + "loss_char": 0.7059, + "loss_token": 1.583, + "lr": "2.00e-07", + "norm": 5.9507, + "step": 1479 + }, + { + "acc_char": 0.2857, + "acc_token": 0.611, + "epoch": 0.03, + "loss_char": 0.9413, + "loss_token": 1.7928, + "lr": "2.00e-07", + "norm": 5.3356, + "step": 1480 + }, + { + "acc_char": 0.315, + "acc_token": 0.706, + "epoch": 0.03, + "loss_char": 0.6653, + "loss_token": 1.557, + "lr": "2.00e-07", + "norm": 5.6644, + "step": 1481 + }, + { + "acc_char": 0.3029, + "acc_token": 0.6934, + "epoch": 0.03, + "loss_char": 0.7113, + "loss_token": 1.6224, + "lr": "2.00e-07", + "norm": 5.8906, + "step": 1482 + }, + { + "acc_char": 0.3124, + "acc_token": 0.7088, + "epoch": 0.03, + "loss_char": 0.6479, + "loss_token": 1.44, + "lr": "2.00e-07", + "norm": 5.4433, + "step": 1483 + }, + { + "acc_char": 0.2985, + "acc_token": 0.6853, + "epoch": 0.03, + "loss_char": 0.6856, + "loss_token": 1.5717, + "lr": "2.00e-07", + "norm": 5.6576, + "step": 1484 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6793, + "epoch": 0.03, + "loss_char": 0.7508, + "loss_token": 1.5706, + "lr": "2.00e-07", + "norm": 5.9455, + "step": 1485 + }, + { + "acc_char": 0.3116, + "acc_token": 0.7013, + "epoch": 0.03, + "loss_char": 0.6822, + "loss_token": 1.5799, + "lr": "2.00e-07", + "norm": 5.6926, + "step": 1486 + }, + { + "acc_char": 0.3101, + "acc_token": 0.6957, + "epoch": 0.03, + "loss_char": 0.6928, + "loss_token": 1.5266, + "lr": "2.00e-07", + "norm": 5.6095, + "step": 1487 + }, + { + "acc_char": 0.2985, + "acc_token": 0.679, + "epoch": 0.03, + "loss_char": 0.7325, + "loss_token": 1.7235, + "lr": "2.00e-07", + "norm": 6.2878, + "step": 1488 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6928, + "epoch": 0.03, + "loss_char": 0.6938, + "loss_token": 1.5439, + "lr": "2.00e-07", + "norm": 5.8769, + "step": 1489 + }, + { + "acc_char": 0.2956, + "acc_token": 0.6612, + "epoch": 0.03, + "loss_char": 0.8068, + "loss_token": 1.7251, + "lr": "2.00e-07", + "norm": 5.9183, + "step": 1490 + }, + { + "acc_char": 0.3088, + "acc_token": 0.6941, + "epoch": 0.03, + "loss_char": 0.691, + "loss_token": 1.5389, + "lr": "2.00e-07", + "norm": 5.8215, + "step": 1491 + }, + { + "acc_char": 0.3048, + "acc_token": 0.6917, + "epoch": 0.03, + "loss_char": 0.6979, + "loss_token": 1.6155, + "lr": "2.00e-07", + "norm": 5.684, + "step": 1492 + }, + { + "acc_char": 0.3073, + "acc_token": 0.6831, + "epoch": 0.03, + "loss_char": 0.7344, + "loss_token": 1.6261, + "lr": "2.00e-07", + "norm": 5.7121, + "step": 1493 + }, + { + "acc_char": 0.2876, + "acc_token": 0.6484, + "epoch": 0.03, + "loss_char": 0.8171, + "loss_token": 1.7074, + "lr": "2.00e-07", + "norm": 5.8721, + "step": 1494 + }, + { + "acc_char": 0.3122, + "acc_token": 0.7034, + "epoch": 0.03, + "loss_char": 0.6991, + "loss_token": 1.5556, + "lr": "2.00e-07", + "norm": 5.6013, + "step": 1495 + }, + { + "acc_char": 0.3105, + "acc_token": 0.6988, + "epoch": 0.03, + "loss_char": 0.6859, + "loss_token": 1.5507, + "lr": "2.00e-07", + "norm": 5.7857, + "step": 1496 + }, + { + "acc_char": 0.3001, + "acc_token": 0.6766, + "epoch": 0.03, + "loss_char": 0.747, + "loss_token": 1.6438, + "lr": "2.00e-07", + "norm": 5.6768, + "step": 1497 + }, + { + "acc_char": 0.3066, + "acc_token": 0.7027, + "epoch": 0.03, + "loss_char": 0.6664, + "loss_token": 1.6072, + "lr": "2.00e-07", + "norm": 5.7578, + "step": 1498 + }, + { + "acc_char": 0.2913, + "acc_token": 0.67, + "epoch": 0.03, + "loss_char": 0.7487, + "loss_token": 1.7215, + "lr": "2.00e-07", + "norm": 6.554, + "step": 1499 + }, + { + "acc_char": 0.292, + "acc_token": 0.6318, + "epoch": 0.03, + "loss_char": 0.8849, + "loss_token": 1.7365, + "lr": "2.00e-07", + "norm": 5.7328, + "step": 1500 + }, + { + "acc_char": 0.295, + "acc_token": 0.6673, + "epoch": 0.03, + "loss_char": 0.7515, + "loss_token": 1.6953, + "lr": "2.00e-07", + "norm": 5.9586, + "step": 1501 + }, + { + "acc_char": 0.3103, + "acc_token": 0.7079, + "epoch": 0.03, + "loss_char": 0.6526, + "loss_token": 1.4509, + "lr": "2.00e-07", + "norm": 5.6547, + "step": 1502 + }, + { + "acc_char": 0.3149, + "acc_token": 0.7111, + "epoch": 0.03, + "loss_char": 0.6496, + "loss_token": 1.5897, + "lr": "2.00e-07", + "norm": 5.7801, + "step": 1503 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6702, + "epoch": 0.03, + "loss_char": 0.7507, + "loss_token": 1.6831, + "lr": "2.00e-07", + "norm": 5.8254, + "step": 1504 + }, + { + "acc_char": 0.3132, + "acc_token": 0.7008, + "epoch": 0.03, + "loss_char": 0.682, + "loss_token": 1.5586, + "lr": "2.00e-07", + "norm": 5.5422, + "step": 1505 + }, + { + "acc_char": 0.2954, + "acc_token": 0.6786, + "epoch": 0.03, + "loss_char": 0.7245, + "loss_token": 1.6014, + "lr": "2.00e-07", + "norm": 5.6137, + "step": 1506 + }, + { + "acc_char": 0.3009, + "acc_token": 0.6757, + "epoch": 0.03, + "loss_char": 0.7528, + "loss_token": 1.6957, + "lr": "2.00e-07", + "norm": 6.0229, + "step": 1507 + }, + { + "acc_char": 0.3042, + "acc_token": 0.6788, + "epoch": 0.03, + "loss_char": 0.7415, + "loss_token": 1.6613, + "lr": "2.00e-07", + "norm": 5.6879, + "step": 1508 + }, + { + "acc_char": 0.3037, + "acc_token": 0.6762, + "epoch": 0.03, + "loss_char": 0.762, + "loss_token": 1.6386, + "lr": "2.00e-07", + "norm": 5.9348, + "step": 1509 + }, + { + "acc_char": 0.3078, + "acc_token": 0.69, + "epoch": 0.03, + "loss_char": 0.7001, + "loss_token": 1.5491, + "lr": "2.00e-07", + "norm": 6.2251, + "step": 1510 + }, + { + "acc_char": 0.3051, + "acc_token": 0.6926, + "epoch": 0.03, + "loss_char": 0.6873, + "loss_token": 1.4929, + "lr": "2.00e-07", + "norm": 5.5336, + "step": 1511 + }, + { + "acc_char": 0.3049, + "acc_token": 0.688, + "epoch": 0.03, + "loss_char": 0.7237, + "loss_token": 1.6694, + "lr": "2.00e-07", + "norm": 6.0466, + "step": 1512 + }, + { + "acc_char": 0.3071, + "acc_token": 0.6709, + "epoch": 0.03, + "loss_char": 0.7927, + "loss_token": 1.6294, + "lr": "2.00e-07", + "norm": 6.2413, + "step": 1513 + }, + { + "acc_char": 0.3108, + "acc_token": 0.6995, + "epoch": 0.03, + "loss_char": 0.6674, + "loss_token": 1.5762, + "lr": "2.00e-07", + "norm": 5.6092, + "step": 1514 + }, + { + "acc_char": 0.2982, + "acc_token": 0.6776, + "epoch": 0.03, + "loss_char": 0.7238, + "loss_token": 1.5251, + "lr": "2.00e-07", + "norm": 5.4862, + "step": 1515 + }, + { + "acc_char": 0.3253, + "acc_token": 0.7275, + "epoch": 0.03, + "loss_char": 0.6361, + "loss_token": 1.4564, + "lr": "2.00e-07", + "norm": 5.9106, + "step": 1516 + }, + { + "acc_char": 0.2983, + "acc_token": 0.6783, + "epoch": 0.03, + "loss_char": 0.725, + "loss_token": 1.5952, + "lr": "2.00e-07", + "norm": 5.4654, + "step": 1517 + }, + { + "acc_char": 0.3083, + "acc_token": 0.6961, + "epoch": 0.03, + "loss_char": 0.6991, + "loss_token": 1.637, + "lr": "2.00e-07", + "norm": 5.9309, + "step": 1518 + }, + { + "acc_char": 0.3237, + "acc_token": 0.736, + "epoch": 0.03, + "loss_char": 0.5821, + "loss_token": 1.4387, + "lr": "2.00e-07", + "norm": 9.0917, + "step": 1519 + }, + { + "acc_char": 0.3144, + "acc_token": 0.7185, + "epoch": 0.03, + "loss_char": 0.6176, + "loss_token": 1.4751, + "lr": "2.00e-07", + "norm": 5.3616, + "step": 1520 + }, + { + "acc_char": 0.302, + "acc_token": 0.6792, + "epoch": 0.03, + "loss_char": 0.7392, + "loss_token": 1.6167, + "lr": "2.00e-07", + "norm": 5.9091, + "step": 1521 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6809, + "epoch": 0.03, + "loss_char": 0.7184, + "loss_token": 1.6441, + "lr": "2.00e-07", + "norm": 6.1267, + "step": 1522 + }, + { + "acc_char": 0.3028, + "acc_token": 0.6993, + "epoch": 0.03, + "loss_char": 0.659, + "loss_token": 1.5538, + "lr": "2.00e-07", + "norm": 5.7089, + "step": 1523 + }, + { + "acc_char": 0.3049, + "acc_token": 0.6934, + "epoch": 0.03, + "loss_char": 0.7043, + "loss_token": 1.6431, + "lr": "2.00e-07", + "norm": 5.7937, + "step": 1524 + }, + { + "acc_char": 0.3015, + "acc_token": 0.6784, + "epoch": 0.03, + "loss_char": 0.7425, + "loss_token": 1.5777, + "lr": "2.00e-07", + "norm": 5.8933, + "step": 1525 + }, + { + "acc_char": 0.3037, + "acc_token": 0.6474, + "epoch": 0.03, + "loss_char": 0.8172, + "loss_token": 1.7262, + "lr": "2.00e-07", + "norm": 6.004, + "step": 1526 + }, + { + "acc_char": 0.3026, + "acc_token": 0.684, + "epoch": 0.03, + "loss_char": 0.7037, + "loss_token": 1.5507, + "lr": "2.00e-07", + "norm": 5.5811, + "step": 1527 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6867, + "epoch": 0.03, + "loss_char": 0.7148, + "loss_token": 1.6341, + "lr": "2.00e-07", + "norm": 5.7757, + "step": 1528 + }, + { + "acc_char": 0.3044, + "acc_token": 0.6911, + "epoch": 0.03, + "loss_char": 0.7137, + "loss_token": 1.5792, + "lr": "2.00e-07", + "norm": 5.6105, + "step": 1529 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6929, + "epoch": 0.03, + "loss_char": 0.6726, + "loss_token": 1.5453, + "lr": "2.00e-07", + "norm": 5.6457, + "step": 1530 + }, + { + "acc_char": 0.2814, + "acc_token": 0.6126, + "epoch": 0.03, + "loss_char": 0.8906, + "loss_token": 1.6564, + "lr": "2.00e-07", + "norm": 5.5373, + "step": 1531 + }, + { + "acc_char": 0.3132, + "acc_token": 0.7041, + "epoch": 0.03, + "loss_char": 0.6804, + "loss_token": 1.544, + "lr": "2.00e-07", + "norm": 5.454, + "step": 1532 + }, + { + "acc_char": 0.2948, + "acc_token": 0.6682, + "epoch": 0.03, + "loss_char": 0.7727, + "loss_token": 1.6557, + "lr": "2.00e-07", + "norm": 5.9132, + "step": 1533 + }, + { + "acc_char": 0.3199, + "acc_token": 0.7156, + "epoch": 0.03, + "loss_char": 0.6603, + "loss_token": 1.6086, + "lr": "2.00e-07", + "norm": 5.5924, + "step": 1534 + }, + { + "acc_char": 0.2951, + "acc_token": 0.6527, + "epoch": 0.03, + "loss_char": 0.8307, + "loss_token": 1.7452, + "lr": "2.00e-07", + "norm": 5.8703, + "step": 1535 + }, + { + "acc_char": 0.2731, + "acc_token": 0.6037, + "epoch": 0.03, + "loss_char": 0.9329, + "loss_token": 1.8911, + "lr": "2.00e-07", + "norm": 5.598, + "step": 1536 + }, + { + "acc_char": 0.308, + "acc_token": 0.6992, + "epoch": 0.03, + "loss_char": 0.6665, + "loss_token": 1.5613, + "lr": "2.00e-07", + "norm": 5.7335, + "step": 1537 + }, + { + "acc_char": 0.2874, + "acc_token": 0.6679, + "epoch": 0.03, + "loss_char": 0.7361, + "loss_token": 1.7051, + "lr": "2.00e-07", + "norm": 5.9876, + "step": 1538 + }, + { + "acc_char": 0.3093, + "acc_token": 0.6969, + "epoch": 0.03, + "loss_char": 0.6927, + "loss_token": 1.5823, + "lr": "2.00e-07", + "norm": 5.6095, + "step": 1539 + }, + { + "acc_char": 0.2997, + "acc_token": 0.6724, + "epoch": 0.03, + "loss_char": 0.7626, + "loss_token": 1.5533, + "lr": "2.00e-07", + "norm": 5.8144, + "step": 1540 + }, + { + "acc_char": 0.284, + "acc_token": 0.6538, + "epoch": 0.03, + "loss_char": 0.7933, + "loss_token": 1.7849, + "lr": "2.00e-07", + "norm": 5.8935, + "step": 1541 + }, + { + "acc_char": 0.3049, + "acc_token": 0.7073, + "epoch": 0.03, + "loss_char": 0.6401, + "loss_token": 1.554, + "lr": "2.00e-07", + "norm": 6.5277, + "step": 1542 + }, + { + "acc_char": 0.2852, + "acc_token": 0.631, + "epoch": 0.03, + "loss_char": 0.874, + "loss_token": 1.7801, + "lr": "2.00e-07", + "norm": 5.8398, + "step": 1543 + }, + { + "acc_char": 0.2913, + "acc_token": 0.6557, + "epoch": 0.03, + "loss_char": 0.8232, + "loss_token": 1.6764, + "lr": "2.00e-07", + "norm": 6.0703, + "step": 1544 + }, + { + "acc_char": 0.2922, + "acc_token": 0.6589, + "epoch": 0.03, + "loss_char": 0.7863, + "loss_token": 1.7354, + "lr": "2.00e-07", + "norm": 5.9921, + "step": 1545 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6869, + "epoch": 0.03, + "loss_char": 0.71, + "loss_token": 1.5729, + "lr": "2.00e-07", + "norm": 5.8846, + "step": 1546 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6931, + "epoch": 0.03, + "loss_char": 0.6835, + "loss_token": 1.4914, + "lr": "2.00e-07", + "norm": 5.4592, + "step": 1547 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6818, + "epoch": 0.03, + "loss_char": 0.7137, + "loss_token": 1.5716, + "lr": "2.00e-07", + "norm": 5.5647, + "step": 1548 + }, + { + "acc_char": 0.3113, + "acc_token": 0.7023, + "epoch": 0.03, + "loss_char": 0.6746, + "loss_token": 1.6481, + "lr": "2.00e-07", + "norm": 6.1887, + "step": 1549 + }, + { + "acc_char": 0.3199, + "acc_token": 0.7231, + "epoch": 0.03, + "loss_char": 0.6157, + "loss_token": 1.4955, + "lr": "2.00e-07", + "norm": 5.2796, + "step": 1550 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6919, + "epoch": 0.03, + "loss_char": 0.6797, + "loss_token": 1.5701, + "lr": "2.00e-07", + "norm": 5.3542, + "step": 1551 + }, + { + "acc_char": 0.2986, + "acc_token": 0.6709, + "epoch": 0.03, + "loss_char": 0.7678, + "loss_token": 1.6893, + "lr": "2.00e-07", + "norm": 6.0462, + "step": 1552 + }, + { + "acc_char": 0.3057, + "acc_token": 0.6952, + "epoch": 0.03, + "loss_char": 0.6986, + "loss_token": 1.6515, + "lr": "2.00e-07", + "norm": 6.3134, + "step": 1553 + }, + { + "acc_char": 0.3197, + "acc_token": 0.7216, + "epoch": 0.03, + "loss_char": 0.6201, + "loss_token": 1.4663, + "lr": "2.00e-07", + "norm": 5.6139, + "step": 1554 + }, + { + "acc_char": 0.2872, + "acc_token": 0.6617, + "epoch": 0.03, + "loss_char": 0.7866, + "loss_token": 1.7042, + "lr": "2.00e-07", + "norm": 5.8264, + "step": 1555 + }, + { + "acc_char": 0.2946, + "acc_token": 0.6686, + "epoch": 0.03, + "loss_char": 0.7846, + "loss_token": 1.677, + "lr": "2.00e-07", + "norm": 5.8586, + "step": 1556 + }, + { + "acc_char": 0.2852, + "acc_token": 0.6372, + "epoch": 0.03, + "loss_char": 0.8567, + "loss_token": 1.756, + "lr": "2.00e-07", + "norm": 5.9905, + "step": 1557 + }, + { + "acc_char": 0.2986, + "acc_token": 0.6659, + "epoch": 0.03, + "loss_char": 0.785, + "loss_token": 1.5579, + "lr": "2.00e-07", + "norm": 5.7136, + "step": 1558 + }, + { + "acc_char": 0.3083, + "acc_token": 0.7062, + "epoch": 0.03, + "loss_char": 0.643, + "loss_token": 1.4669, + "lr": "2.00e-07", + "norm": 5.4612, + "step": 1559 + }, + { + "acc_char": 0.2992, + "acc_token": 0.6764, + "epoch": 0.03, + "loss_char": 0.7477, + "loss_token": 1.7046, + "lr": "2.00e-07", + "norm": 6.3699, + "step": 1560 + }, + { + "acc_char": 0.2895, + "acc_token": 0.6508, + "epoch": 0.03, + "loss_char": 0.8214, + "loss_token": 1.7717, + "lr": "2.00e-07", + "norm": 6.0916, + "step": 1561 + }, + { + "acc_char": 0.2966, + "acc_token": 0.674, + "epoch": 0.03, + "loss_char": 0.7445, + "loss_token": 1.6343, + "lr": "2.00e-07", + "norm": 6.1589, + "step": 1562 + }, + { + "acc_char": 0.2937, + "acc_token": 0.669, + "epoch": 0.03, + "loss_char": 0.7472, + "loss_token": 1.6864, + "lr": "2.00e-07", + "norm": 5.7867, + "step": 1563 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6748, + "epoch": 0.03, + "loss_char": 0.742, + "loss_token": 1.6648, + "lr": "2.00e-07", + "norm": 5.9749, + "step": 1564 + }, + { + "acc_char": 0.2934, + "acc_token": 0.6705, + "epoch": 0.03, + "loss_char": 0.7405, + "loss_token": 1.6838, + "lr": "2.00e-07", + "norm": 5.7158, + "step": 1565 + }, + { + "acc_char": 0.2926, + "acc_token": 0.6674, + "epoch": 0.03, + "loss_char": 0.7529, + "loss_token": 1.746, + "lr": "2.00e-07", + "norm": 5.8629, + "step": 1566 + }, + { + "acc_char": 0.2944, + "acc_token": 0.6342, + "epoch": 0.03, + "loss_char": 0.8539, + "loss_token": 1.7368, + "lr": "2.00e-07", + "norm": 5.8781, + "step": 1567 + }, + { + "acc_char": 0.2776, + "acc_token": 0.6914, + "epoch": 0.03, + "loss_char": 0.6223, + "loss_token": 1.6307, + "lr": "2.00e-07", + "norm": 5.8752, + "step": 1568 + }, + { + "acc_char": 0.3047, + "acc_token": 0.6875, + "epoch": 0.03, + "loss_char": 0.7133, + "loss_token": 1.6326, + "lr": "2.00e-07", + "norm": 5.7235, + "step": 1569 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6858, + "epoch": 0.03, + "loss_char": 0.7244, + "loss_token": 1.6255, + "lr": "2.00e-07", + "norm": 5.8664, + "step": 1570 + }, + { + "acc_char": 0.2994, + "acc_token": 0.6859, + "epoch": 0.03, + "loss_char": 0.686, + "loss_token": 1.6009, + "lr": "2.00e-07", + "norm": 5.8634, + "step": 1571 + }, + { + "acc_char": 0.3086, + "acc_token": 0.6875, + "epoch": 0.03, + "loss_char": 0.7275, + "loss_token": 1.577, + "lr": "2.00e-07", + "norm": 5.6534, + "step": 1572 + }, + { + "acc_char": 0.3029, + "acc_token": 0.6868, + "epoch": 0.03, + "loss_char": 0.6953, + "loss_token": 1.5916, + "lr": "2.00e-07", + "norm": 5.8533, + "step": 1573 + }, + { + "acc_char": 0.2905, + "acc_token": 0.6677, + "epoch": 0.03, + "loss_char": 0.7498, + "loss_token": 1.6433, + "lr": "2.00e-07", + "norm": 5.6664, + "step": 1574 + }, + { + "acc_char": 0.3043, + "acc_token": 0.6911, + "epoch": 0.03, + "loss_char": 0.689, + "loss_token": 1.6164, + "lr": "2.00e-07", + "norm": 5.9362, + "step": 1575 + }, + { + "acc_char": 0.3025, + "acc_token": 0.6621, + "epoch": 0.03, + "loss_char": 0.7666, + "loss_token": 1.728, + "lr": "2.00e-07", + "norm": 5.61, + "step": 1576 + }, + { + "acc_char": 0.2988, + "acc_token": 0.6836, + "epoch": 0.03, + "loss_char": 0.6929, + "loss_token": 1.5914, + "lr": "2.00e-07", + "norm": 5.441, + "step": 1577 + }, + { + "acc_char": 0.3046, + "acc_token": 0.6872, + "epoch": 0.03, + "loss_char": 0.7199, + "loss_token": 1.5617, + "lr": "2.00e-07", + "norm": 5.874, + "step": 1578 + }, + { + "acc_char": 0.3127, + "acc_token": 0.7107, + "epoch": 0.03, + "loss_char": 0.6479, + "loss_token": 1.5476, + "lr": "2.00e-07", + "norm": 5.5485, + "step": 1579 + }, + { + "acc_char": 0.3122, + "acc_token": 0.7101, + "epoch": 0.03, + "loss_char": 0.6448, + "loss_token": 1.5271, + "lr": "2.00e-07", + "norm": 5.7229, + "step": 1580 + }, + { + "acc_char": 0.3166, + "acc_token": 0.7051, + "epoch": 0.03, + "loss_char": 0.6688, + "loss_token": 1.5672, + "lr": "2.00e-07", + "norm": 6.229, + "step": 1581 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6724, + "epoch": 0.03, + "loss_char": 0.7677, + "loss_token": 1.7375, + "lr": "2.00e-07", + "norm": 5.9405, + "step": 1582 + }, + { + "acc_char": 0.305, + "acc_token": 0.6874, + "epoch": 0.03, + "loss_char": 0.7192, + "loss_token": 1.6619, + "lr": "2.00e-07", + "norm": 5.5951, + "step": 1583 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6898, + "epoch": 0.03, + "loss_char": 0.6772, + "loss_token": 1.5391, + "lr": "2.00e-07", + "norm": 5.6114, + "step": 1584 + }, + { + "acc_char": 0.2955, + "acc_token": 0.6787, + "epoch": 0.03, + "loss_char": 0.7257, + "loss_token": 1.6871, + "lr": "2.00e-07", + "norm": 5.8745, + "step": 1585 + }, + { + "acc_char": 0.3106, + "acc_token": 0.6899, + "epoch": 0.03, + "loss_char": 0.7315, + "loss_token": 1.641, + "lr": "2.00e-07", + "norm": 5.6256, + "step": 1586 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6919, + "epoch": 0.03, + "loss_char": 0.7055, + "loss_token": 1.6004, + "lr": "2.00e-07", + "norm": 5.8087, + "step": 1587 + }, + { + "acc_char": 0.2998, + "acc_token": 0.6839, + "epoch": 0.03, + "loss_char": 0.6994, + "loss_token": 1.6181, + "lr": "2.00e-07", + "norm": 6.0405, + "step": 1588 + }, + { + "acc_char": 0.2947, + "acc_token": 0.6892, + "epoch": 0.03, + "loss_char": 0.6667, + "loss_token": 1.5679, + "lr": "2.00e-07", + "norm": 5.5971, + "step": 1589 + }, + { + "acc_char": 0.3153, + "acc_token": 0.7029, + "epoch": 0.03, + "loss_char": 0.6714, + "loss_token": 1.5788, + "lr": "2.00e-07", + "norm": 5.8712, + "step": 1590 + }, + { + "acc_char": 0.2986, + "acc_token": 0.6734, + "epoch": 0.03, + "loss_char": 0.746, + "loss_token": 1.5374, + "lr": "2.00e-07", + "norm": 5.7618, + "step": 1591 + }, + { + "acc_char": 0.2929, + "acc_token": 0.6699, + "epoch": 0.03, + "loss_char": 0.7309, + "loss_token": 1.6542, + "lr": "2.00e-07", + "norm": 5.9127, + "step": 1592 + }, + { + "acc_char": 0.3077, + "acc_token": 0.7082, + "epoch": 0.03, + "loss_char": 0.6441, + "loss_token": 1.565, + "lr": "2.00e-07", + "norm": 5.8163, + "step": 1593 + }, + { + "acc_char": 0.3114, + "acc_token": 0.7076, + "epoch": 0.03, + "loss_char": 0.6438, + "loss_token": 1.5501, + "lr": "2.00e-07", + "norm": 5.9952, + "step": 1594 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6792, + "epoch": 0.03, + "loss_char": 0.7387, + "loss_token": 1.6415, + "lr": "2.00e-07", + "norm": 6.0267, + "step": 1595 + }, + { + "acc_char": 0.3012, + "acc_token": 0.6793, + "epoch": 0.03, + "loss_char": 0.7423, + "loss_token": 1.6092, + "lr": "2.00e-07", + "norm": 5.8914, + "step": 1596 + }, + { + "acc_char": 0.2812, + "acc_token": 0.6123, + "epoch": 0.03, + "loss_char": 0.9265, + "loss_token": 1.8237, + "lr": "2.00e-07", + "norm": 6.0108, + "step": 1597 + }, + { + "acc_char": 0.3074, + "acc_token": 0.6941, + "epoch": 0.03, + "loss_char": 0.7013, + "loss_token": 1.5543, + "lr": "2.00e-07", + "norm": 5.7529, + "step": 1598 + }, + { + "acc_char": 0.3193, + "acc_token": 0.7049, + "epoch": 0.03, + "loss_char": 0.6746, + "loss_token": 1.5883, + "lr": "2.00e-07", + "norm": 5.7992, + "step": 1599 + }, + { + "acc_char": 0.2794, + "acc_token": 0.6196, + "epoch": 0.03, + "loss_char": 0.8698, + "loss_token": 1.8613, + "lr": "2.00e-07", + "norm": 5.6176, + "step": 1600 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6837, + "epoch": 0.03, + "loss_char": 0.716, + "loss_token": 1.6654, + "lr": "2.00e-07", + "norm": 5.8047, + "step": 1601 + }, + { + "acc_char": 0.2936, + "acc_token": 0.6689, + "epoch": 0.03, + "loss_char": 0.7596, + "loss_token": 1.5968, + "lr": "2.00e-07", + "norm": 5.7253, + "step": 1602 + }, + { + "acc_char": 0.307, + "acc_token": 0.6971, + "epoch": 0.03, + "loss_char": 0.7045, + "loss_token": 1.5782, + "lr": "2.00e-07", + "norm": 5.7435, + "step": 1603 + }, + { + "acc_char": 0.3112, + "acc_token": 0.6984, + "epoch": 0.03, + "loss_char": 0.6758, + "loss_token": 1.5436, + "lr": "2.00e-07", + "norm": 5.5545, + "step": 1604 + }, + { + "acc_char": 0.3087, + "acc_token": 0.6902, + "epoch": 0.03, + "loss_char": 0.7368, + "loss_token": 1.6031, + "lr": "2.00e-07", + "norm": 5.8911, + "step": 1605 + }, + { + "acc_char": 0.3206, + "acc_token": 0.7121, + "epoch": 0.03, + "loss_char": 0.6525, + "loss_token": 1.4743, + "lr": "2.00e-07", + "norm": 5.6098, + "step": 1606 + }, + { + "acc_char": 0.2956, + "acc_token": 0.6733, + "epoch": 0.03, + "loss_char": 0.7465, + "loss_token": 1.6542, + "lr": "2.00e-07", + "norm": 5.6449, + "step": 1607 + }, + { + "acc_char": 0.3006, + "acc_token": 0.6817, + "epoch": 0.03, + "loss_char": 0.7201, + "loss_token": 1.6398, + "lr": "2.00e-07", + "norm": 6.1526, + "step": 1608 + }, + { + "acc_char": 0.32, + "acc_token": 0.7127, + "epoch": 0.03, + "loss_char": 0.6491, + "loss_token": 1.4844, + "lr": "2.00e-07", + "norm": 5.6564, + "step": 1609 + }, + { + "acc_char": 0.2977, + "acc_token": 0.6769, + "epoch": 0.03, + "loss_char": 0.7228, + "loss_token": 1.6341, + "lr": "2.00e-07", + "norm": 6.3097, + "step": 1610 + }, + { + "acc_char": 0.3051, + "acc_token": 0.685, + "epoch": 0.03, + "loss_char": 0.7089, + "loss_token": 1.6848, + "lr": "2.00e-07", + "norm": 5.7969, + "step": 1611 + }, + { + "acc_char": 0.3016, + "acc_token": 0.6773, + "epoch": 0.03, + "loss_char": 0.7574, + "loss_token": 1.6114, + "lr": "2.00e-07", + "norm": 5.7741, + "step": 1612 + }, + { + "acc_char": 0.3078, + "acc_token": 0.6986, + "epoch": 0.03, + "loss_char": 0.6857, + "loss_token": 1.5755, + "lr": "2.00e-07", + "norm": 5.4162, + "step": 1613 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6676, + "epoch": 0.03, + "loss_char": 0.781, + "loss_token": 1.6191, + "lr": "2.00e-07", + "norm": 6.1566, + "step": 1614 + }, + { + "acc_char": 0.3072, + "acc_token": 0.7054, + "epoch": 0.03, + "loss_char": 0.6538, + "loss_token": 1.5423, + "lr": "2.00e-07", + "norm": 6.3931, + "step": 1615 + }, + { + "acc_char": 0.3032, + "acc_token": 0.6881, + "epoch": 0.03, + "loss_char": 0.7145, + "loss_token": 1.5765, + "lr": "2.00e-07", + "norm": 5.7099, + "step": 1616 + }, + { + "acc_char": 0.298, + "acc_token": 0.6861, + "epoch": 0.03, + "loss_char": 0.7009, + "loss_token": 1.6293, + "lr": "2.00e-07", + "norm": 5.4639, + "step": 1617 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6804, + "epoch": 0.03, + "loss_char": 0.7174, + "loss_token": 1.6531, + "lr": "2.00e-07", + "norm": 5.9676, + "step": 1618 + }, + { + "acc_char": 0.3102, + "acc_token": 0.7056, + "epoch": 0.03, + "loss_char": 0.647, + "loss_token": 1.4693, + "lr": "2.00e-07", + "norm": 5.5077, + "step": 1619 + }, + { + "acc_char": 0.3146, + "acc_token": 0.7076, + "epoch": 0.03, + "loss_char": 0.6477, + "loss_token": 1.5071, + "lr": "2.00e-07", + "norm": 5.8307, + "step": 1620 + }, + { + "acc_char": 0.323, + "acc_token": 0.7129, + "epoch": 0.03, + "loss_char": 0.6701, + "loss_token": 1.462, + "lr": "2.00e-07", + "norm": 5.6707, + "step": 1621 + }, + { + "acc_char": 0.3075, + "acc_token": 0.6937, + "epoch": 0.03, + "loss_char": 0.6996, + "loss_token": 1.6832, + "lr": "2.00e-07", + "norm": 5.8797, + "step": 1622 + }, + { + "acc_char": 0.3074, + "acc_token": 0.6768, + "epoch": 0.03, + "loss_char": 0.7479, + "loss_token": 1.6044, + "lr": "2.00e-07", + "norm": 6.0979, + "step": 1623 + }, + { + "acc_char": 0.3096, + "acc_token": 0.6896, + "epoch": 0.03, + "loss_char": 0.7126, + "loss_token": 1.5891, + "lr": "2.00e-07", + "norm": 5.9975, + "step": 1624 + }, + { + "acc_char": 0.3054, + "acc_token": 0.7098, + "epoch": 0.03, + "loss_char": 0.6366, + "loss_token": 1.4998, + "lr": "2.00e-07", + "norm": 5.5291, + "step": 1625 + }, + { + "acc_char": 0.2923, + "acc_token": 0.6795, + "epoch": 0.03, + "loss_char": 0.6891, + "loss_token": 1.6086, + "lr": "2.00e-07", + "norm": 6.0158, + "step": 1626 + }, + { + "acc_char": 0.3, + "acc_token": 0.6793, + "epoch": 0.03, + "loss_char": 0.7358, + "loss_token": 1.6231, + "lr": "2.00e-07", + "norm": 5.588, + "step": 1627 + }, + { + "acc_char": 0.2866, + "acc_token": 0.6444, + "epoch": 0.03, + "loss_char": 0.8436, + "loss_token": 1.7327, + "lr": "2.00e-07", + "norm": 5.6852, + "step": 1628 + }, + { + "acc_char": 0.3153, + "acc_token": 0.718, + "epoch": 0.03, + "loss_char": 0.6243, + "loss_token": 1.4671, + "lr": "2.00e-07", + "norm": 5.329, + "step": 1629 + }, + { + "acc_char": 0.298, + "acc_token": 0.6713, + "epoch": 0.03, + "loss_char": 0.7667, + "loss_token": 1.6396, + "lr": "2.00e-07", + "norm": 5.5255, + "step": 1630 + }, + { + "acc_char": 0.3222, + "acc_token": 0.7327, + "epoch": 0.03, + "loss_char": 0.6019, + "loss_token": 1.478, + "lr": "2.00e-07", + "norm": 5.5881, + "step": 1631 + }, + { + "acc_char": 0.2903, + "acc_token": 0.6636, + "epoch": 0.03, + "loss_char": 0.7665, + "loss_token": 1.7035, + "lr": "2.00e-07", + "norm": 5.9326, + "step": 1632 + }, + { + "acc_char": 0.3145, + "acc_token": 0.7065, + "epoch": 0.03, + "loss_char": 0.6586, + "loss_token": 1.4825, + "lr": "2.00e-07", + "norm": 5.404, + "step": 1633 + }, + { + "acc_char": 0.2927, + "acc_token": 0.6748, + "epoch": 0.03, + "loss_char": 0.7274, + "loss_token": 1.6455, + "lr": "2.00e-07", + "norm": 5.5322, + "step": 1634 + }, + { + "acc_char": 0.2951, + "acc_token": 0.6765, + "epoch": 0.03, + "loss_char": 0.7247, + "loss_token": 1.6789, + "lr": "2.00e-07", + "norm": 5.9366, + "step": 1635 + }, + { + "acc_char": 0.3179, + "acc_token": 0.7132, + "epoch": 0.03, + "loss_char": 0.651, + "loss_token": 1.5661, + "lr": "2.00e-07", + "norm": 5.6878, + "step": 1636 + }, + { + "acc_char": 0.2999, + "acc_token": 0.6914, + "epoch": 0.03, + "loss_char": 0.6779, + "loss_token": 1.5841, + "lr": "2.00e-07", + "norm": 5.5705, + "step": 1637 + }, + { + "acc_char": 0.2868, + "acc_token": 0.6586, + "epoch": 0.03, + "loss_char": 0.7848, + "loss_token": 1.7319, + "lr": "2.00e-07", + "norm": 5.9417, + "step": 1638 + }, + { + "acc_char": 0.3076, + "acc_token": 0.7015, + "epoch": 0.03, + "loss_char": 0.664, + "loss_token": 1.5467, + "lr": "2.00e-07", + "norm": 5.6836, + "step": 1639 + }, + { + "acc_char": 0.2993, + "acc_token": 0.6912, + "epoch": 0.03, + "loss_char": 0.6707, + "loss_token": 1.5604, + "lr": "2.00e-07", + "norm": 5.6171, + "step": 1640 + }, + { + "acc_char": 0.3036, + "acc_token": 0.6803, + "epoch": 0.03, + "loss_char": 0.7486, + "loss_token": 1.6083, + "lr": "2.00e-07", + "norm": 5.6892, + "step": 1641 + }, + { + "acc_char": 0.2826, + "acc_token": 0.6196, + "epoch": 0.03, + "loss_char": 0.864, + "loss_token": 1.7846, + "lr": "2.00e-07", + "norm": 5.726, + "step": 1642 + }, + { + "acc_char": 0.2922, + "acc_token": 0.6715, + "epoch": 0.03, + "loss_char": 0.7413, + "loss_token": 1.622, + "lr": "2.00e-07", + "norm": 5.8253, + "step": 1643 + }, + { + "acc_char": 0.3087, + "acc_token": 0.7041, + "epoch": 0.03, + "loss_char": 0.6732, + "loss_token": 1.6123, + "lr": "2.00e-07", + "norm": 5.7101, + "step": 1644 + }, + { + "acc_char": 0.2978, + "acc_token": 0.6763, + "epoch": 0.03, + "loss_char": 0.7507, + "loss_token": 1.6697, + "lr": "2.00e-07", + "norm": 6.2216, + "step": 1645 + }, + { + "acc_char": 0.3017, + "acc_token": 0.6764, + "epoch": 0.03, + "loss_char": 0.7482, + "loss_token": 1.6147, + "lr": "2.00e-07", + "norm": 5.9259, + "step": 1646 + }, + { + "acc_char": 0.305, + "acc_token": 0.6866, + "epoch": 0.03, + "loss_char": 0.7198, + "loss_token": 1.612, + "lr": "2.00e-07", + "norm": 5.7423, + "step": 1647 + }, + { + "acc_char": 0.3004, + "acc_token": 0.6849, + "epoch": 0.03, + "loss_char": 0.6971, + "loss_token": 1.479, + "lr": "2.00e-07", + "norm": 5.5494, + "step": 1648 + }, + { + "acc_char": 0.3019, + "acc_token": 0.691, + "epoch": 0.03, + "loss_char": 0.6878, + "loss_token": 1.5946, + "lr": "2.00e-07", + "norm": 5.4234, + "step": 1649 + }, + { + "acc_char": 0.3052, + "acc_token": 0.682, + "epoch": 0.03, + "loss_char": 0.7504, + "loss_token": 1.6363, + "lr": "2.00e-07", + "norm": 5.7636, + "step": 1650 + }, + { + "acc_char": 0.317, + "acc_token": 0.6991, + "epoch": 0.03, + "loss_char": 0.7028, + "loss_token": 1.553, + "lr": "2.00e-07", + "norm": 5.845, + "step": 1651 + }, + { + "acc_char": 0.2978, + "acc_token": 0.6823, + "epoch": 0.03, + "loss_char": 0.7021, + "loss_token": 1.6016, + "lr": "2.00e-07", + "norm": 5.9888, + "step": 1652 + }, + { + "acc_char": 0.3052, + "acc_token": 0.6953, + "epoch": 0.03, + "loss_char": 0.6907, + "loss_token": 1.5925, + "lr": "2.00e-07", + "norm": 5.5029, + "step": 1653 + }, + { + "acc_char": 0.2973, + "acc_token": 0.6836, + "epoch": 0.03, + "loss_char": 0.6947, + "loss_token": 1.573, + "lr": "2.00e-07", + "norm": 5.7139, + "step": 1654 + }, + { + "acc_char": 0.2818, + "acc_token": 0.6206, + "epoch": 0.03, + "loss_char": 0.8751, + "loss_token": 1.7996, + "lr": "2.00e-07", + "norm": 5.4943, + "step": 1655 + }, + { + "acc_char": 0.319, + "acc_token": 0.7293, + "epoch": 0.03, + "loss_char": 0.6127, + "loss_token": 1.5183, + "lr": "2.00e-07", + "norm": 5.6807, + "step": 1656 + }, + { + "acc_char": 0.305, + "acc_token": 0.6877, + "epoch": 0.03, + "loss_char": 0.7121, + "loss_token": 1.6186, + "lr": "2.00e-07", + "norm": 5.7593, + "step": 1657 + }, + { + "acc_char": 0.2833, + "acc_token": 0.6182, + "epoch": 0.03, + "loss_char": 0.8656, + "loss_token": 1.7297, + "lr": "2.00e-07", + "norm": 5.6634, + "step": 1658 + }, + { + "acc_char": 0.3079, + "acc_token": 0.6799, + "epoch": 0.03, + "loss_char": 0.7352, + "loss_token": 1.6045, + "lr": "2.00e-07", + "norm": 5.8658, + "step": 1659 + }, + { + "acc_char": 0.3183, + "acc_token": 0.7286, + "epoch": 0.03, + "loss_char": 0.5886, + "loss_token": 1.4656, + "lr": "2.00e-07", + "norm": 5.3731, + "step": 1660 + }, + { + "acc_char": 0.3095, + "acc_token": 0.7071, + "epoch": 0.03, + "loss_char": 0.6553, + "loss_token": 1.6271, + "lr": "2.00e-07", + "norm": 5.7404, + "step": 1661 + }, + { + "acc_char": 0.3166, + "acc_token": 0.7046, + "epoch": 0.03, + "loss_char": 0.6912, + "loss_token": 1.5677, + "lr": "2.00e-07", + "norm": 5.5271, + "step": 1662 + }, + { + "acc_char": 0.2971, + "acc_token": 0.671, + "epoch": 0.03, + "loss_char": 0.7658, + "loss_token": 1.6013, + "lr": "2.00e-07", + "norm": 5.7884, + "step": 1663 + }, + { + "acc_char": 0.3086, + "acc_token": 0.6928, + "epoch": 0.03, + "loss_char": 0.713, + "loss_token": 1.6149, + "lr": "2.00e-07", + "norm": 5.6196, + "step": 1664 + }, + { + "acc_char": 0.308, + "acc_token": 0.6924, + "epoch": 0.03, + "loss_char": 0.6979, + "loss_token": 1.5536, + "lr": "2.00e-07", + "norm": 5.8624, + "step": 1665 + }, + { + "acc_char": 0.3089, + "acc_token": 0.7017, + "epoch": 0.03, + "loss_char": 0.6717, + "loss_token": 1.6092, + "lr": "2.00e-07", + "norm": 5.6943, + "step": 1666 + }, + { + "acc_char": 0.2943, + "acc_token": 0.6652, + "epoch": 0.03, + "loss_char": 0.7602, + "loss_token": 1.7535, + "lr": "2.00e-07", + "norm": 6.2278, + "step": 1667 + }, + { + "acc_char": 0.3104, + "acc_token": 0.7119, + "epoch": 0.03, + "loss_char": 0.636, + "loss_token": 1.421, + "lr": "2.00e-07", + "norm": 5.2749, + "step": 1668 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6839, + "epoch": 0.03, + "loss_char": 0.706, + "loss_token": 1.5401, + "lr": "2.00e-07", + "norm": 5.5522, + "step": 1669 + }, + { + "acc_char": 0.3068, + "acc_token": 0.6969, + "epoch": 0.03, + "loss_char": 0.6767, + "loss_token": 1.5882, + "lr": "2.00e-07", + "norm": 5.5253, + "step": 1670 + }, + { + "acc_char": 0.2855, + "acc_token": 0.6567, + "epoch": 0.03, + "loss_char": 0.7808, + "loss_token": 1.6454, + "lr": "2.00e-07", + "norm": 5.9135, + "step": 1671 + }, + { + "acc_char": 0.3075, + "acc_token": 0.6892, + "epoch": 0.03, + "loss_char": 0.7201, + "loss_token": 1.6653, + "lr": "2.00e-07", + "norm": 5.6643, + "step": 1672 + }, + { + "acc_char": 0.2983, + "acc_token": 0.686, + "epoch": 0.03, + "loss_char": 0.7036, + "loss_token": 1.4937, + "lr": "2.00e-07", + "norm": 5.5387, + "step": 1673 + }, + { + "acc_char": 0.3125, + "acc_token": 0.7078, + "epoch": 0.03, + "loss_char": 0.6312, + "loss_token": 1.4955, + "lr": "2.00e-07", + "norm": 5.6384, + "step": 1674 + }, + { + "acc_char": 0.2953, + "acc_token": 0.6475, + "epoch": 0.03, + "loss_char": 0.8527, + "loss_token": 1.7366, + "lr": "2.00e-07", + "norm": 5.8287, + "step": 1675 + }, + { + "acc_char": 0.3063, + "acc_token": 0.7003, + "epoch": 0.03, + "loss_char": 0.6829, + "loss_token": 1.5926, + "lr": "2.00e-07", + "norm": 6.3705, + "step": 1676 + }, + { + "acc_char": 0.3088, + "acc_token": 0.7072, + "epoch": 0.03, + "loss_char": 0.6492, + "loss_token": 1.5889, + "lr": "2.00e-07", + "norm": 5.6321, + "step": 1677 + }, + { + "acc_char": 0.3039, + "acc_token": 0.6827, + "epoch": 0.03, + "loss_char": 0.7191, + "loss_token": 1.639, + "lr": "2.00e-07", + "norm": 5.8795, + "step": 1678 + }, + { + "acc_char": 0.29, + "acc_token": 0.6521, + "epoch": 0.03, + "loss_char": 0.8078, + "loss_token": 1.6645, + "lr": "2.00e-07", + "norm": 5.6345, + "step": 1679 + }, + { + "acc_char": 0.291, + "acc_token": 0.6557, + "epoch": 0.03, + "loss_char": 0.8089, + "loss_token": 1.7202, + "lr": "2.00e-07", + "norm": 6.1896, + "step": 1680 + }, + { + "acc_char": 0.2941, + "acc_token": 0.6743, + "epoch": 0.03, + "loss_char": 0.7159, + "loss_token": 1.5983, + "lr": "2.00e-07", + "norm": 5.5012, + "step": 1681 + }, + { + "acc_char": 0.3052, + "acc_token": 0.6911, + "epoch": 0.03, + "loss_char": 0.7331, + "loss_token": 1.6407, + "lr": "2.00e-07", + "norm": 5.7705, + "step": 1682 + }, + { + "acc_char": 0.2974, + "acc_token": 0.6622, + "epoch": 0.03, + "loss_char": 0.8001, + "loss_token": 1.7329, + "lr": "2.00e-07", + "norm": 6.0946, + "step": 1683 + }, + { + "acc_char": 0.3164, + "acc_token": 0.7132, + "epoch": 0.03, + "loss_char": 0.6316, + "loss_token": 1.4631, + "lr": "2.00e-07", + "norm": 5.8817, + "step": 1684 + }, + { + "acc_char": 0.3026, + "acc_token": 0.6784, + "epoch": 0.03, + "loss_char": 0.747, + "loss_token": 1.6259, + "lr": "2.00e-07", + "norm": 5.7106, + "step": 1685 + }, + { + "acc_char": 0.3283, + "acc_token": 0.735, + "epoch": 0.03, + "loss_char": 0.598, + "loss_token": 1.4537, + "lr": "2.00e-07", + "norm": 5.9056, + "step": 1686 + }, + { + "acc_char": 0.3011, + "acc_token": 0.6876, + "epoch": 0.03, + "loss_char": 0.6903, + "loss_token": 1.608, + "lr": "2.00e-07", + "norm": 5.6, + "step": 1687 + }, + { + "acc_char": 0.3081, + "acc_token": 0.6884, + "epoch": 0.03, + "loss_char": 0.7476, + "loss_token": 1.7288, + "lr": "2.00e-07", + "norm": 5.8679, + "step": 1688 + }, + { + "acc_char": 0.3007, + "acc_token": 0.6842, + "epoch": 0.03, + "loss_char": 0.7168, + "loss_token": 1.5752, + "lr": "2.00e-07", + "norm": 5.681, + "step": 1689 + }, + { + "acc_char": 0.3368, + "acc_token": 0.7385, + "epoch": 0.03, + "loss_char": 0.5935, + "loss_token": 1.556, + "lr": "2.00e-07", + "norm": 5.8472, + "step": 1690 + }, + { + "acc_char": 0.2926, + "acc_token": 0.6757, + "epoch": 0.03, + "loss_char": 0.7092, + "loss_token": 1.627, + "lr": "2.00e-07", + "norm": 5.8508, + "step": 1691 + }, + { + "acc_char": 0.2891, + "acc_token": 0.6339, + "epoch": 0.03, + "loss_char": 0.8447, + "loss_token": 1.7228, + "lr": "2.00e-07", + "norm": 5.39, + "step": 1692 + }, + { + "acc_char": 0.2962, + "acc_token": 0.6806, + "epoch": 0.03, + "loss_char": 0.7107, + "loss_token": 1.6122, + "lr": "2.00e-07", + "norm": 5.5625, + "step": 1693 + }, + { + "acc_char": 0.2935, + "acc_token": 0.6781, + "epoch": 0.03, + "loss_char": 0.7368, + "loss_token": 1.6023, + "lr": "2.00e-07", + "norm": 5.7586, + "step": 1694 + }, + { + "acc_char": 0.2888, + "acc_token": 0.618, + "epoch": 0.03, + "loss_char": 0.914, + "loss_token": 1.7759, + "lr": "2.00e-07", + "norm": 5.5458, + "step": 1695 + }, + { + "acc_char": 0.2986, + "acc_token": 0.6778, + "epoch": 0.03, + "loss_char": 0.7617, + "loss_token": 1.6123, + "lr": "2.00e-07", + "norm": 5.6041, + "step": 1696 + }, + { + "acc_char": 0.3014, + "acc_token": 0.6804, + "epoch": 0.03, + "loss_char": 0.7763, + "loss_token": 1.5783, + "lr": "2.00e-07", + "norm": 6.4606, + "step": 1697 + }, + { + "acc_char": 0.2949, + "acc_token": 0.6779, + "epoch": 0.03, + "loss_char": 0.7234, + "loss_token": 1.6868, + "lr": "2.00e-07", + "norm": 5.8553, + "step": 1698 + }, + { + "acc_char": 0.3016, + "acc_token": 0.6914, + "epoch": 0.03, + "loss_char": 0.6839, + "loss_token": 1.6502, + "lr": "2.00e-07", + "norm": 5.9714, + "step": 1699 + }, + { + "acc_char": 0.3106, + "acc_token": 0.7059, + "epoch": 0.03, + "loss_char": 0.6671, + "loss_token": 1.5837, + "lr": "2.00e-07", + "norm": 5.6659, + "step": 1700 + }, + { + "acc_char": 0.2735, + "acc_token": 0.6861, + "epoch": 0.03, + "loss_char": 0.6165, + "loss_token": 1.5865, + "lr": "2.00e-07", + "norm": 6.0211, + "step": 1701 + }, + { + "acc_char": 0.3034, + "acc_token": 0.6779, + "epoch": 0.03, + "loss_char": 0.7463, + "loss_token": 1.6613, + "lr": "2.00e-07", + "norm": 5.7788, + "step": 1702 + }, + { + "acc_char": 0.3097, + "acc_token": 0.6844, + "epoch": 0.03, + "loss_char": 0.7017, + "loss_token": 1.6179, + "lr": "2.00e-07", + "norm": 5.996, + "step": 1703 + }, + { + "acc_char": 0.2974, + "acc_token": 0.6822, + "epoch": 0.03, + "loss_char": 0.7026, + "loss_token": 1.7367, + "lr": "2.00e-07", + "norm": 6.6624, + "step": 1704 + }, + { + "acc_char": 0.3138, + "acc_token": 0.6773, + "epoch": 0.03, + "loss_char": 0.7662, + "loss_token": 1.6222, + "lr": "2.00e-07", + "norm": 5.5022, + "step": 1705 + }, + { + "acc_char": 0.2954, + "acc_token": 0.663, + "epoch": 0.03, + "loss_char": 0.7875, + "loss_token": 1.5797, + "lr": "2.00e-07", + "norm": 5.453, + "step": 1706 + }, + { + "acc_char": 0.3159, + "acc_token": 0.6957, + "epoch": 0.03, + "loss_char": 0.7143, + "loss_token": 1.5202, + "lr": "2.00e-07", + "norm": 6.0906, + "step": 1707 + }, + { + "acc_char": 0.314, + "acc_token": 0.7008, + "epoch": 0.03, + "loss_char": 0.6894, + "loss_token": 1.5779, + "lr": "2.00e-07", + "norm": 5.8956, + "step": 1708 + }, + { + "acc_char": 0.3105, + "acc_token": 0.6938, + "epoch": 0.03, + "loss_char": 0.7217, + "loss_token": 1.588, + "lr": "2.00e-07", + "norm": 5.8675, + "step": 1709 + }, + { + "acc_char": 0.3103, + "acc_token": 0.6963, + "epoch": 0.03, + "loss_char": 0.6995, + "loss_token": 1.6254, + "lr": "2.00e-07", + "norm": 5.8286, + "step": 1710 + }, + { + "acc_char": 0.3003, + "acc_token": 0.6787, + "epoch": 0.03, + "loss_char": 0.7472, + "loss_token": 1.6933, + "lr": "2.00e-07", + "norm": 5.8765, + "step": 1711 + }, + { + "acc_char": 0.3151, + "acc_token": 0.7065, + "epoch": 0.03, + "loss_char": 0.667, + "loss_token": 1.536, + "lr": "2.00e-07", + "norm": 5.7391, + "step": 1712 + }, + { + "acc_char": 0.3097, + "acc_token": 0.699, + "epoch": 0.03, + "loss_char": 0.6717, + "loss_token": 1.5737, + "lr": "2.00e-07", + "norm": 5.6138, + "step": 1713 + }, + { + "acc_char": 0.316, + "acc_token": 0.7113, + "epoch": 0.03, + "loss_char": 0.6721, + "loss_token": 1.5796, + "lr": "2.00e-07", + "norm": 5.8443, + "step": 1714 + }, + { + "acc_char": 0.305, + "acc_token": 0.6889, + "epoch": 0.03, + "loss_char": 0.724, + "loss_token": 1.5748, + "lr": "2.00e-07", + "norm": 5.5035, + "step": 1715 + }, + { + "acc_char": 0.3032, + "acc_token": 0.7013, + "epoch": 0.03, + "loss_char": 0.6446, + "loss_token": 1.5459, + "lr": "2.00e-07", + "norm": 5.642, + "step": 1716 + }, + { + "acc_char": 0.2952, + "acc_token": 0.688, + "epoch": 0.03, + "loss_char": 0.6734, + "loss_token": 1.5776, + "lr": "2.00e-07", + "norm": 5.6175, + "step": 1717 + }, + { + "acc_char": 0.2981, + "acc_token": 0.6796, + "epoch": 0.03, + "loss_char": 0.7212, + "loss_token": 1.6105, + "lr": "2.00e-07", + "norm": 6.1429, + "step": 1718 + }, + { + "acc_char": 0.2959, + "acc_token": 0.6775, + "epoch": 0.03, + "loss_char": 0.7264, + "loss_token": 1.6338, + "lr": "2.00e-07", + "norm": 5.5765, + "step": 1719 + }, + { + "acc_char": 0.3074, + "acc_token": 0.6851, + "epoch": 0.03, + "loss_char": 0.7234, + "loss_token": 1.612, + "lr": "2.00e-07", + "norm": 5.7202, + "step": 1720 + }, + { + "acc_char": 0.2906, + "acc_token": 0.6583, + "epoch": 0.03, + "loss_char": 0.8232, + "loss_token": 1.7554, + "lr": "2.00e-07", + "norm": 6.091, + "step": 1721 + }, + { + "acc_char": 0.3159, + "acc_token": 0.7077, + "epoch": 0.03, + "loss_char": 0.6617, + "loss_token": 1.4657, + "lr": "2.00e-07", + "norm": 5.3041, + "step": 1722 + }, + { + "acc_char": 0.2902, + "acc_token": 0.6618, + "epoch": 0.03, + "loss_char": 0.7708, + "loss_token": 1.6204, + "lr": "2.00e-07", + "norm": 5.605, + "step": 1723 + }, + { + "acc_char": 0.3085, + "acc_token": 0.6796, + "epoch": 0.03, + "loss_char": 0.7645, + "loss_token": 1.612, + "lr": "2.00e-07", + "norm": 5.7922, + "step": 1724 + }, + { + "acc_char": 0.2971, + "acc_token": 0.6404, + "epoch": 0.03, + "loss_char": 0.8225, + "loss_token": 1.6631, + "lr": "2.00e-07", + "norm": 5.5715, + "step": 1725 + }, + { + "acc_char": 0.319, + "acc_token": 0.6897, + "epoch": 0.03, + "loss_char": 0.6891, + "loss_token": 1.5501, + "lr": "2.00e-07", + "norm": 5.5443, + "step": 1726 + }, + { + "acc_char": 0.3164, + "acc_token": 0.7073, + "epoch": 0.03, + "loss_char": 0.6799, + "loss_token": 1.592, + "lr": "2.00e-07", + "norm": 5.5647, + "step": 1727 + }, + { + "acc_char": 0.3133, + "acc_token": 0.7051, + "epoch": 0.03, + "loss_char": 0.661, + "loss_token": 1.6084, + "lr": "2.00e-07", + "norm": 5.8548, + "step": 1728 + }, + { + "acc_char": 0.3017, + "acc_token": 0.6912, + "epoch": 0.03, + "loss_char": 0.694, + "loss_token": 1.7379, + "lr": "2.00e-07", + "norm": 6.0479, + "step": 1729 + }, + { + "acc_char": 0.3022, + "acc_token": 0.6878, + "epoch": 0.03, + "loss_char": 0.699, + "loss_token": 1.6654, + "lr": "2.00e-07", + "norm": 5.7858, + "step": 1730 + }, + { + "acc_char": 0.3061, + "acc_token": 0.6872, + "epoch": 0.03, + "loss_char": 0.7225, + "loss_token": 1.5633, + "lr": "2.00e-07", + "norm": 5.5348, + "step": 1731 + }, + { + "acc_char": 0.2839, + "acc_token": 0.6558, + "epoch": 0.03, + "loss_char": 0.7938, + "loss_token": 1.6926, + "lr": "2.00e-07", + "norm": 6.0785, + "step": 1732 + }, + { + "acc_char": 0.2934, + "acc_token": 0.6419, + "epoch": 0.03, + "loss_char": 0.8494, + "loss_token": 1.7951, + "lr": "2.00e-07", + "norm": 5.7669, + "step": 1733 + }, + { + "acc_char": 0.2941, + "acc_token": 0.6791, + "epoch": 0.03, + "loss_char": 0.7158, + "loss_token": 1.6679, + "lr": "2.00e-07", + "norm": 5.7, + "step": 1734 + }, + { + "acc_char": 0.3115, + "acc_token": 0.6936, + "epoch": 0.03, + "loss_char": 0.7035, + "loss_token": 1.5711, + "lr": "2.00e-07", + "norm": 5.7193, + "step": 1735 + }, + { + "acc_char": 0.3071, + "acc_token": 0.7071, + "epoch": 0.03, + "loss_char": 0.6319, + "loss_token": 1.519, + "lr": "2.00e-07", + "norm": 5.7064, + "step": 1736 + }, + { + "acc_char": 0.3034, + "acc_token": 0.6798, + "epoch": 0.03, + "loss_char": 0.7287, + "loss_token": 1.5719, + "lr": "2.00e-07", + "norm": 5.7227, + "step": 1737 + }, + { + "acc_char": 0.2943, + "acc_token": 0.6784, + "epoch": 0.03, + "loss_char": 0.728, + "loss_token": 1.6606, + "lr": "2.00e-07", + "norm": 5.7986, + "step": 1738 + }, + { + "acc_char": 0.3183, + "acc_token": 0.6948, + "epoch": 0.03, + "loss_char": 0.7256, + "loss_token": 1.5931, + "lr": "2.00e-07", + "norm": 5.6669, + "step": 1739 + }, + { + "acc_char": 0.3087, + "acc_token": 0.6671, + "epoch": 0.03, + "loss_char": 0.78, + "loss_token": 1.7101, + "lr": "2.00e-07", + "norm": 5.8135, + "step": 1740 + }, + { + "acc_char": 0.3024, + "acc_token": 0.6934, + "epoch": 0.03, + "loss_char": 0.6551, + "loss_token": 1.5156, + "lr": "2.00e-07", + "norm": 6.3682, + "step": 1741 + }, + { + "acc_char": 0.3054, + "acc_token": 0.6736, + "epoch": 0.03, + "loss_char": 0.7574, + "loss_token": 1.5606, + "lr": "2.00e-07", + "norm": 5.8203, + "step": 1742 + }, + { + "acc_char": 0.307, + "acc_token": 0.6946, + "epoch": 0.03, + "loss_char": 0.6862, + "loss_token": 1.5647, + "lr": "2.00e-07", + "norm": 5.7305, + "step": 1743 + }, + { + "acc_char": 0.2871, + "acc_token": 0.6231, + "epoch": 0.03, + "loss_char": 0.884, + "loss_token": 1.819, + "lr": "2.00e-07", + "norm": 5.9354, + "step": 1744 + }, + { + "acc_char": 0.2858, + "acc_token": 0.6322, + "epoch": 0.03, + "loss_char": 0.8419, + "loss_token": 1.6682, + "lr": "2.00e-07", + "norm": 5.5717, + "step": 1745 + }, + { + "acc_char": 0.2926, + "acc_token": 0.6643, + "epoch": 0.03, + "loss_char": 0.7732, + "loss_token": 1.718, + "lr": "2.00e-07", + "norm": 5.7846, + "step": 1746 + }, + { + "acc_char": 0.2998, + "acc_token": 0.6812, + "epoch": 0.03, + "loss_char": 0.7343, + "loss_token": 1.6174, + "lr": "2.00e-07", + "norm": 5.7623, + "step": 1747 + }, + { + "acc_char": 0.2984, + "acc_token": 0.6937, + "epoch": 0.03, + "loss_char": 0.6639, + "loss_token": 1.5517, + "lr": "2.00e-07", + "norm": 5.6115, + "step": 1748 + }, + { + "acc_char": 0.3132, + "acc_token": 0.6951, + "epoch": 0.03, + "loss_char": 0.708, + "loss_token": 1.5821, + "lr": "2.00e-07", + "norm": 5.8594, + "step": 1749 + }, + { + "acc_char": 0.2966, + "acc_token": 0.6765, + "epoch": 0.03, + "loss_char": 0.7249, + "loss_token": 1.6193, + "lr": "2.00e-07", + "norm": 5.9276, + "step": 1750 + }, + { + "acc_char": 0.2956, + "acc_token": 0.6666, + "epoch": 0.03, + "loss_char": 0.7695, + "loss_token": 1.6098, + "lr": "2.00e-07", + "norm": 5.9244, + "step": 1751 + }, + { + "acc_char": 0.2856, + "acc_token": 0.6745, + "epoch": 0.03, + "loss_char": 0.7153, + "loss_token": 1.6257, + "lr": "2.00e-07", + "norm": 6.2479, + "step": 1752 + }, + { + "acc_char": 0.3144, + "acc_token": 0.716, + "epoch": 0.03, + "loss_char": 0.6248, + "loss_token": 1.4773, + "lr": "2.00e-07", + "norm": 5.328, + "step": 1753 + }, + { + "acc_char": 0.2995, + "acc_token": 0.6832, + "epoch": 0.03, + "loss_char": 0.706, + "loss_token": 1.6197, + "lr": "2.00e-07", + "norm": 5.9709, + "step": 1754 + }, + { + "acc_char": 0.2874, + "acc_token": 0.6328, + "epoch": 0.03, + "loss_char": 0.8373, + "loss_token": 1.7249, + "lr": "2.00e-07", + "norm": 5.7176, + "step": 1755 + }, + { + "acc_char": 0.2957, + "acc_token": 0.6702, + "epoch": 0.03, + "loss_char": 0.7423, + "loss_token": 1.6452, + "lr": "2.00e-07", + "norm": 5.7704, + "step": 1756 + }, + { + "acc_char": 0.2989, + "acc_token": 0.6776, + "epoch": 0.03, + "loss_char": 0.7342, + "loss_token": 1.6687, + "lr": "2.00e-07", + "norm": 5.7341, + "step": 1757 + }, + { + "acc_char": 0.3055, + "acc_token": 0.7035, + "epoch": 0.03, + "loss_char": 0.6279, + "loss_token": 1.5377, + "lr": "2.00e-07", + "norm": 5.5632, + "step": 1758 + }, + { + "acc_char": 0.2955, + "acc_token": 0.6865, + "epoch": 0.03, + "loss_char": 0.6719, + "loss_token": 1.6061, + "lr": "2.00e-07", + "norm": 5.8618, + "step": 1759 + }, + { + "acc_char": 0.2821, + "acc_token": 0.6584, + "epoch": 0.03, + "loss_char": 0.7972, + "loss_token": 1.7979, + "lr": "1.99e-07", + "norm": 6.072, + "step": 1760 + }, + { + "acc_char": 0.3088, + "acc_token": 0.6928, + "epoch": 0.03, + "loss_char": 0.6977, + "loss_token": 1.5895, + "lr": "1.99e-07", + "norm": 6.2279, + "step": 1761 + }, + { + "acc_char": 0.3033, + "acc_token": 0.6922, + "epoch": 0.03, + "loss_char": 0.6863, + "loss_token": 1.6013, + "lr": "1.99e-07", + "norm": 5.6444, + "step": 1762 + }, + { + "acc_char": 0.2986, + "acc_token": 0.6751, + "epoch": 0.03, + "loss_char": 0.7423, + "loss_token": 1.6667, + "lr": "1.99e-07", + "norm": 5.7379, + "step": 1763 + }, + { + "acc_char": 0.297, + "acc_token": 0.6774, + "epoch": 0.03, + "loss_char": 0.7249, + "loss_token": 1.6749, + "lr": "1.99e-07", + "norm": 6.1533, + "step": 1764 + }, + { + "acc_char": 0.2982, + "acc_token": 0.6805, + "epoch": 0.03, + "loss_char": 0.7077, + "loss_token": 1.5948, + "lr": "1.99e-07", + "norm": 5.8114, + "step": 1765 + }, + { + "acc_char": 0.3067, + "acc_token": 0.6859, + "epoch": 0.03, + "loss_char": 0.7297, + "loss_token": 1.6133, + "lr": "1.99e-07", + "norm": 5.5519, + "step": 1766 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6767, + "epoch": 0.03, + "loss_char": 0.7591, + "loss_token": 1.6389, + "lr": "1.99e-07", + "norm": 5.7607, + "step": 1767 + }, + { + "acc_char": 0.3089, + "acc_token": 0.7008, + "epoch": 0.03, + "loss_char": 0.6708, + "loss_token": 1.6271, + "lr": "1.99e-07", + "norm": 5.9782, + "step": 1768 + }, + { + "acc_char": 0.2963, + "acc_token": 0.6712, + "epoch": 0.03, + "loss_char": 0.7529, + "loss_token": 1.6849, + "lr": "1.99e-07", + "norm": 6.1694, + "step": 1769 + }, + { + "acc_char": 0.2967, + "acc_token": 0.6822, + "epoch": 0.03, + "loss_char": 0.7043, + "loss_token": 1.525, + "lr": "1.99e-07", + "norm": 5.579, + "step": 1770 + }, + { + "acc_char": 0.3024, + "acc_token": 0.6872, + "epoch": 0.03, + "loss_char": 0.7056, + "loss_token": 1.5849, + "lr": "1.99e-07", + "norm": 5.9316, + "step": 1771 + }, + { + "acc_char": 0.3063, + "acc_token": 0.6997, + "epoch": 0.03, + "loss_char": 0.6743, + "loss_token": 1.5539, + "lr": "1.99e-07", + "norm": 5.6747, + "step": 1772 + }, + { + "acc_char": 0.3042, + "acc_token": 0.679, + "epoch": 0.03, + "loss_char": 0.7526, + "loss_token": 1.6509, + "lr": "1.99e-07", + "norm": 5.8773, + "step": 1773 + }, + { + "acc_char": 0.3031, + "acc_token": 0.6836, + "epoch": 0.03, + "loss_char": 0.7267, + "loss_token": 1.6467, + "lr": "1.99e-07", + "norm": 5.8952, + "step": 1774 + }, + { + "acc_char": 0.3017, + "acc_token": 0.6808, + "epoch": 0.03, + "loss_char": 0.7203, + "loss_token": 1.6047, + "lr": "1.99e-07", + "norm": 5.8058, + "step": 1775 + }, + { + "acc_char": 0.303, + "acc_token": 0.6802, + "epoch": 0.03, + "loss_char": 0.7312, + "loss_token": 1.6448, + "lr": "1.99e-07", + "norm": 6.1805, + "step": 1776 + }, + { + "acc_char": 0.3019, + "acc_token": 0.6815, + "epoch": 0.03, + "loss_char": 0.7149, + "loss_token": 1.6754, + "lr": "1.99e-07", + "norm": 6.1277, + "step": 1777 + }, + { + "acc_char": 0.2831, + "acc_token": 0.6658, + "epoch": 0.03, + "loss_char": 0.7458, + "loss_token": 1.6643, + "lr": "1.99e-07", + "norm": 5.6969, + "step": 1778 + }, + { + "acc_char": 0.3091, + "acc_token": 0.6912, + "epoch": 0.03, + "loss_char": 0.7104, + "loss_token": 1.5921, + "lr": "1.99e-07", + "norm": 6.0038, + "step": 1779 + }, + { + "acc_char": 0.2966, + "acc_token": 0.6718, + "epoch": 0.03, + "loss_char": 0.7499, + "loss_token": 1.6861, + "lr": "1.99e-07", + "norm": 5.9677, + "step": 1780 + }, + { + "acc_char": 0.2973, + "acc_token": 0.6715, + "epoch": 0.03, + "loss_char": 0.7577, + "loss_token": 1.675, + "lr": "1.99e-07", + "norm": 5.8496, + "step": 1781 + }, + { + "acc_char": 0.3088, + "acc_token": 0.6957, + "epoch": 0.03, + "loss_char": 0.6928, + "loss_token": 1.4824, + "lr": "1.99e-07", + "norm": 5.617, + "step": 1782 + }, + { + "acc_char": 0.3093, + "acc_token": 0.6729, + "epoch": 0.03, + "loss_char": 0.7746, + "loss_token": 1.6802, + "lr": "1.99e-07", + "norm": 5.9807, + "step": 1783 + }, + { + "acc_char": 0.3068, + "acc_token": 0.6768, + "epoch": 0.03, + "loss_char": 0.7679, + "loss_token": 1.6124, + "lr": "1.99e-07", + "norm": 5.9877, + "step": 1784 + }, + { + "acc_char": 0.3244, + "acc_token": 0.7239, + "epoch": 0.03, + "loss_char": 0.6387, + "loss_token": 1.4429, + "lr": "1.99e-07", + "norm": 5.3048, + "step": 1785 + }, + { + "acc_char": 0.3015, + "acc_token": 0.6626, + "epoch": 0.03, + "loss_char": 0.8047, + "loss_token": 1.6889, + "lr": "1.99e-07", + "norm": 5.7797, + "step": 1786 + }, + { + "acc_char": 0.2968, + "acc_token": 0.6674, + "epoch": 0.03, + "loss_char": 0.7591, + "loss_token": 1.6156, + "lr": "1.99e-07", + "norm": 5.6749, + "step": 1787 + }, + { + "acc_char": 0.2884, + "acc_token": 0.6264, + "epoch": 0.03, + "loss_char": 0.8927, + "loss_token": 1.7878, + "lr": "1.99e-07", + "norm": 5.9712, + "step": 1788 + }, + { + "acc_char": 0.3074, + "acc_token": 0.7018, + "epoch": 0.03, + "loss_char": 0.6909, + "loss_token": 1.5815, + "lr": "1.99e-07", + "norm": 5.7451, + "step": 1789 + }, + { + "acc_char": 0.2977, + "acc_token": 0.6778, + "epoch": 0.03, + "loss_char": 0.7358, + "loss_token": 1.6576, + "lr": "1.99e-07", + "norm": 6.4111, + "step": 1790 + }, + { + "acc_char": 0.3044, + "acc_token": 0.6762, + "epoch": 0.03, + "loss_char": 0.7493, + "loss_token": 1.6757, + "lr": "1.99e-07", + "norm": 5.9979, + "step": 1791 + }, + { + "acc_char": 0.2726, + "acc_token": 0.5943, + "epoch": 0.03, + "loss_char": 0.9682, + "loss_token": 1.8345, + "lr": "1.99e-07", + "norm": 5.644, + "step": 1792 + }, + { + "acc_char": 0.3091, + "acc_token": 0.7004, + "epoch": 0.03, + "loss_char": 0.6841, + "loss_token": 1.5326, + "lr": "1.99e-07", + "norm": 5.5105, + "step": 1793 + }, + { + "acc_char": 0.3115, + "acc_token": 0.7085, + "epoch": 0.03, + "loss_char": 0.6296, + "loss_token": 1.5025, + "lr": "1.99e-07", + "norm": 5.435, + "step": 1794 + }, + { + "acc_char": 0.2962, + "acc_token": 0.6761, + "epoch": 0.03, + "loss_char": 0.7292, + "loss_token": 1.6037, + "lr": "1.99e-07", + "norm": 5.7251, + "step": 1795 + }, + { + "acc_char": 0.3096, + "acc_token": 0.7037, + "epoch": 0.03, + "loss_char": 0.662, + "loss_token": 1.6494, + "lr": "1.99e-07", + "norm": 6.0807, + "step": 1796 + }, + { + "acc_char": 0.3063, + "acc_token": 0.6889, + "epoch": 0.03, + "loss_char": 0.7193, + "loss_token": 1.5482, + "lr": "1.99e-07", + "norm": 5.6579, + "step": 1797 + }, + { + "acc_char": 0.2851, + "acc_token": 0.6324, + "epoch": 0.03, + "loss_char": 0.8989, + "loss_token": 1.8132, + "lr": "1.99e-07", + "norm": 5.7828, + "step": 1798 + }, + { + "acc_char": 0.3053, + "acc_token": 0.6844, + "epoch": 0.03, + "loss_char": 0.7401, + "loss_token": 1.6022, + "lr": "1.99e-07", + "norm": 5.6144, + "step": 1799 + }, + { + "acc_char": 0.3087, + "acc_token": 0.69, + "epoch": 0.03, + "loss_char": 0.7005, + "loss_token": 1.538, + "lr": "1.99e-07", + "norm": 5.6592, + "step": 1800 + } + ], + "logging_steps": 1.0, + "max_steps": 54626, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 200, + "total_flos": 93278636605440.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}