explcre commited on
Commit
12cd09d
·
verified ·
1 Parent(s): fde9deb

Upload exp_phase8_grpo_1000step_seed2_20260503_140432/log.jsonl with huggingface_hub

Browse files
exp_phase8_grpo_1000step_seed2_20260503_140432/log.jsonl ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {"step": 1, "elapsed_s": 58.317341327667236, "wallclock_s_per_step": 58.317342042922974, "loss/total": -3.8743019104003906e-07, "loss/per_rollout_mean": -3.862660378217697e-07, "loss/per_rollout_std": 0.9999613680619437, "reward/total_mean": -0.07291112684723389, "reward/total_std": 0.07289778177188626, "reward/total_min": -0.1477397859096527, "reward/total_max": 0.057197732248531, "reward/argmax_match": 0.0, "reward/oracle_target_sigmoid": 5.563563798304835e-08, "reward/oracle_offtarget_sigmoid": 0.5347038488835096, "reward/motif_hit_frac_topK": 0.21875, "reward/r_argmax_term": 0.0, "reward/r_target_term": 2.7817818991524175e-08, "reward/r_motif_term": 0.08750000000000001, "reward/r_offtarget_term": -0.16041115466505287, "advantage/mean": 3.862660378217697e-07, "advantage/std": 0.9999613680619437, "advantage/abs_mean": 0.8153493229765445, "ratio/mean": 1.0, "ratio/std": 0.0, "ratio/clip_frac_low": 0.0, "ratio/clip_frac_high": 0.0, "kl/mean": 0.0, "kl/abs_mean": 0.0, "reward/cell_Ex": -0.07291112684723389}
2
+ {"step": 2, "elapsed_s": 112.53261685371399, "wallclock_s_per_step": 56.26630878448486, "loss/total": 5.774945020675659e-05, "loss/per_rollout_mean": 5.774945020675659e-05, "loss/per_rollout_std": 0.9998438988460229, "reward/total_mean": -0.08329700500242154, "reward/total_std": 0.05416146549519637, "reward/total_min": -0.19049959182739257, "reward/total_max": -4.895301754004321e-06, "reward/argmax_match": 0.0, "reward/oracle_target_sigmoid": 6.345443438249276e-06, "reward/oracle_offtarget_sigmoid": 0.4235005924138022, "reward/motif_hit_frac_topK": 0.109375, "reward/r_argmax_term": 0.0, "reward/r_target_term": 3.172721719124638e-06, "reward/r_motif_term": 0.043750000000000004, "reward/r_offtarget_term": -0.12705017772414068, "advantage/mean": 3.725290298461914e-08, "advantage/std": 0.9999744501021766, "advantage/abs_mean": 0.8918892815709114, "ratio/mean": 0.9999891109764576, "ratio/std": 0.0003456798048853372, "ratio/clip_frac_low": 0.0, "ratio/clip_frac_high": 0.0, "kl/mean": -2.2172927856445312e-05, "kl/abs_mean": 0.00027760863304138184, "reward/cell_Ex": -0.08329700500242154}
3
+ {"step": 3, "elapsed_s": 168.09378743171692, "wallclock_s_per_step": 56.031262715657554, "loss/total": 0.00582575798034668, "loss/per_rollout_mean": 0.005825743079185486, "loss/per_rollout_std": 0.9910956827306042, "reward/total_mean": 0.06108934645202169, "reward/total_std": 0.3608098072294752, "reward/total_min": -0.14801170825958251, "reward/total_max": 1.0055396400537575, "reward/argmax_match": 0.125, "reward/oracle_target_sigmoid": 0.0014587080281387443, "reward/oracle_offtarget_sigmoid": 0.44463335854015895, "reward/motif_hit_frac_topK": 0.171875, "reward/r_argmax_term": 0.125, "reward/r_target_term": 0.0007293540140693722, "reward/r_motif_term": 0.06875, "reward/r_offtarget_term": -0.13339000756204766, "advantage/mean": -1.4156103134155273e-07, "advantage/std": 0.9999780842898107, "advantage/abs_mean": 0.9094296470284462, "ratio/mean": 0.9978741426020861, "ratio/std": 0.008762968879669833, "ratio/clip_frac_low": 0.0, "ratio/clip_frac_high": 0.0, "kl/mean": -0.002640083432197571, "kl/abs_mean": 0.004226610064506531, "reward/cell_Ex": 0.06108934645202169}
4
+ {"step": 4, "elapsed_s": 224.68239784240723, "wallclock_s_per_step": 56.17059963941574, "loss/total": 0.0030153393745422363, "loss/per_rollout_mean": 0.0030153393745422363, "loss/per_rollout_std": 0.9982684034977369, "reward/total_mean": 0.3202526703756807, "reward/total_std": 0.5557062991782528, "reward/total_min": -0.06750819384574475, "reward/total_max": 1.4743141101236688, "reward/argmax_match": 0.25, "reward/oracle_target_sigmoid": 0.11862077226624392, "reward/oracle_offtarget_sigmoid": 0.13019238585813753, "reward/motif_hit_frac_topK": 0.125, "reward/r_argmax_term": 0.25, "reward/r_target_term": 0.05931038613312196, "reward/r_motif_term": 0.05, "reward/r_offtarget_term": -0.03905771575744126, "advantage/mean": -4.0978193283081055e-08, "advantage/std": 0.9999886509108379, "advantage/abs_mean": 0.9510276056826115, "ratio/mean": 0.9989411192946136, "ratio/std": 0.005130232129590081, "ratio/clip_frac_low": 0.0, "ratio/clip_frac_high": 0.0, "kl/mean": -0.0012934207916259766, "kl/abs_mean": 0.004678964614868164, "reward/cell_Ex": 0.3202526703756807}