vaibhavkhandare commited on
Commit
ad5d3b3
·
verified ·
1 Parent(s): 3326716

Upload folder using huggingface_hub

Browse files
run-output/plots/training_log.csv CHANGED
@@ -1,5 +1,5 @@
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
- 1,3.154,4.348,2.316,0.3017,0.647,38,2.6893
3
- 2,2.855,3.71,2.249,0.2058,0.5125,37,2.554
4
- 3,2.969,4.14,2.25,0.2041,0.4656,45,2.0757
5
- 4,3.294,4.527,2.32,0.2793,0.5243,45,1.9805
 
1
  round,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss
2
+ 1,3.463,4.232,2.793,0.3947,0.6341,44,2.4064
3
+ 2,3.072,3.802,2.25,0.2737,0.5068,48,2.434
4
+ 3,3.469,3.956,2.979,0.3738,0.574,41,2.4042
5
+ 4,3.316,3.517,3.073,0.3453,0.4575,47,2.4202
run-output/plots/training_summary.json CHANGED
@@ -4,14 +4,14 @@
4
  "rounds": 4,
5
  "episodes_per_round": 6,
6
  "before": {
7
- "monthly_engage": 0.5642,
8
- "monthly_strategic": 0.5903,
9
- "monthly_competitive": 0.8313
10
  },
11
  "after": {
12
- "monthly_engage": 0.1071,
13
- "monthly_strategic": 0.3174,
14
- "monthly_competitive": 0.5233
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
@@ -19,9 +19,9 @@
19
  "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
- "monthly_engage": -0.45710000000000006,
23
- "monthly_strategic": -0.27290000000000003,
24
- "monthly_competitive": -0.30800000000000005
25
  },
26
  "training_log": {
27
  "round": [
@@ -31,46 +31,46 @@
31
  4
32
  ],
33
  "avg_episode_reward": [
34
- 3.154,
35
- 2.855,
36
- 2.969,
37
- 3.294
38
  ],
39
  "max_episode_reward": [
40
- 4.348,
41
- 3.71,
42
- 4.14,
43
- 4.527
44
  ],
45
  "min_episode_reward": [
46
- 2.316,
47
- 2.249,
48
  2.25,
49
- 2.32
 
50
  ],
51
  "avg_grader": [
52
- 0.3017,
53
- 0.2058,
54
- 0.2041,
55
- 0.2793
56
  ],
57
  "max_grader": [
58
- 0.647,
59
- 0.5125,
60
- 0.4656,
61
- 0.5243
62
  ],
63
  "n_training_samples": [
64
- 38,
65
- 37,
66
- 45,
67
- 45
68
  ],
69
  "train_loss": [
70
- 2.6893,
71
- 2.554,
72
- 2.0757,
73
- 1.9805
74
  ]
75
  }
76
  }
 
4
  "rounds": 4,
5
  "episodes_per_round": 6,
6
  "before": {
7
+ "monthly_engage": 0.0,
8
+ "monthly_strategic": 0.174,
9
+ "monthly_competitive": 0.028
10
  },
11
  "after": {
12
+ "monthly_engage": 0.0,
13
+ "monthly_strategic": 0.1744,
14
+ "monthly_competitive": 0.028
15
  },
16
  "smart_heuristic": {
17
  "monthly_engage": 0.7352,
 
19
  "monthly_competitive": 0.9066
20
  },
21
  "improvement": {
22
+ "monthly_engage": 0.0,
23
+ "monthly_strategic": 0.00040000000000001146,
24
+ "monthly_competitive": 0.0
25
  },
26
  "training_log": {
27
  "round": [
 
31
  4
32
  ],
33
  "avg_episode_reward": [
34
+ 3.463,
35
+ 3.072,
36
+ 3.469,
37
+ 3.316
38
  ],
39
  "max_episode_reward": [
40
+ 4.232,
41
+ 3.802,
42
+ 3.956,
43
+ 3.517
44
  ],
45
  "min_episode_reward": [
46
+ 2.793,
 
47
  2.25,
48
+ 2.979,
49
+ 3.073
50
  ],
51
  "avg_grader": [
52
+ 0.3947,
53
+ 0.2737,
54
+ 0.3738,
55
+ 0.3453
56
  ],
57
  "max_grader": [
58
+ 0.6341,
59
+ 0.5068,
60
+ 0.574,
61
+ 0.4575
62
  ],
63
  "n_training_samples": [
64
+ 44,
65
+ 48,
66
+ 41,
67
+ 47
68
  ],
69
  "train_loss": [
70
+ 2.4064,
71
+ 2.434,
72
+ 2.4042,
73
+ 2.4202
74
  ]
75
  }
76
  }
run-output/training/train_grpo.executed.ipynb CHANGED
The diff for this file is too large to render. See raw diff