Spaces:

ycwhencpp
/

final-iteration

Paused

vaibhavkhandare commited on 13 days ago

Commit

ad5d3b3

verified ·

1 Parent(s): 3326716

Upload folder using huggingface_hub

Files changed (3) hide show

run-output/plots/training_log.csv CHANGED Viewed

@@ -1,5 +1,5 @@
 round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
-1,3.154,4.348,2.316,0.3017,0.647,38,2.6893
-2,2.855,3.71,2.249,0.2058,0.5125,37,2.554
-3,2.969,4.14,2.25,0.2041,0.4656,45,2.0757
-4,3.294,4.527,2.32,0.2793,0.5243,45,1.9805

 round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
+1,3.463,4.232,2.793,0.3947,0.6341,44,2.4064
+2,3.072,3.802,2.25,0.2737,0.5068,48,2.434
+3,3.469,3.956,2.979,0.3738,0.574,41,2.4042
+4,3.316,3.517,3.073,0.3453,0.4575,47,2.4202

run-output/plots/training_summary.json CHANGED Viewed

@@ -4,14 +4,14 @@
   "rounds": 4,
   "episodes_per_round": 6,
   "before": {
-    "monthly_engage": 0.5642,
-    "monthly_strategic": 0.5903,
-    "monthly_competitive": 0.8313
   },
   "after": {
-    "monthly_engage": 0.1071,
-    "monthly_strategic": 0.3174,
-    "monthly_competitive": 0.5233
   },
   "smart_heuristic": {
     "monthly_engage": 0.7352,
@@ -19,9 +19,9 @@
     "monthly_competitive": 0.9066
   },
   "improvement": {
-    "monthly_engage": -0.45710000000000006,
-    "monthly_strategic": -0.27290000000000003,
-    "monthly_competitive": -0.30800000000000005
   },
   "training_log": {
     "round": [
@@ -31,46 +31,46 @@
       4
     ],
     "avg_episode_reward": [
-      3.154,
-      2.855,
-      2.969,
-      3.294
     ],
     "max_episode_reward": [
-      4.348,
-      3.71,
-      4.14,
-      4.527
     ],
     "min_episode_reward": [
-      2.316,
-      2.249,
       2.25,
-      2.32
     ],
     "avg_grader": [
-      0.3017,
-      0.2058,
-      0.2041,
-      0.2793
     ],
     "max_grader": [
-      0.647,
-      0.5125,
-      0.4656,
-      0.5243
     ],
     "n_training_samples": [
-      38,
-      37,
-      45,
-      45
     ],
     "train_loss": [
-      2.6893,
-      2.554,
-      2.0757,
-      1.9805
     ]
   }
 }

   "rounds": 4,
   "episodes_per_round": 6,
   "before": {
+    "monthly_engage": 0.0,
+    "monthly_strategic": 0.174,
+    "monthly_competitive": 0.028
   },
   "after": {
+    "monthly_engage": 0.0,
+    "monthly_strategic": 0.1744,
+    "monthly_competitive": 0.028
   },
   "smart_heuristic": {
     "monthly_engage": 0.7352,
     "monthly_competitive": 0.9066
   },
   "improvement": {
+    "monthly_engage": 0.0,
+    "monthly_strategic": 0.00040000000000001146,
+    "monthly_competitive": 0.0
   },
   "training_log": {
     "round": [
       4
     ],
     "avg_episode_reward": [
+      3.463,
+      3.072,
+      3.469,
+      3.316
     ],
     "max_episode_reward": [
+      4.232,
+      3.802,
+      3.956,
+      3.517
     ],
     "min_episode_reward": [
+      2.793,
       2.25,
+      2.979,
+      3.073
     ],
     "avg_grader": [
+      0.3947,
+      0.2737,
+      0.3738,
+      0.3453
     ],
     "max_grader": [
+      0.6341,
+      0.5068,
+      0.574,
+      0.4575
     ],
     "n_training_samples": [
+      44,
+      48,
+      41,
+      47
     ],
     "train_loss": [
+      2.4064,
+      2.434,
+      2.4042,
+      2.4202
     ]
   }
 }

run-output/training/train_grpo.executed.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff