updated model
Browse files- my_model.zip +2 -2
- reward_wrapper.py +4 -1
my_model.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f6f53bbf5b346674f37ce2eb18bbe4ce2eafe9b934ac9c44df2f289e1efa2c1
|
3 |
+
size 6980981
|
reward_wrapper.py
CHANGED
@@ -19,7 +19,7 @@ class CustomRewardWrapper(gym.Wrapper):
|
|
19 |
heights = obs["heights"]
|
20 |
|
21 |
# # Default reward
|
22 |
-
reward =
|
23 |
# # reward = ((self.height - max(heights)) / self.height)
|
24 |
# # reward += np.sum(board)
|
25 |
# reward = (self.height - max(heights)) / self.height
|
@@ -37,6 +37,9 @@ class CustomRewardWrapper(gym.Wrapper):
|
|
37 |
#
|
38 |
# # Penalty for big differences between columns
|
39 |
reward -= self.get_bumpiness_and_height(board)[0] / self.height
|
|
|
|
|
|
|
40 |
#
|
41 |
# # Penalty for holes
|
42 |
# # holes_val = 0
|
|
|
19 |
heights = obs["heights"]
|
20 |
|
21 |
# # Default reward
|
22 |
+
reward = 1
|
23 |
# # reward = ((self.height - max(heights)) / self.height)
|
24 |
# # reward += np.sum(board)
|
25 |
# reward = (self.height - max(heights)) / self.height
|
|
|
37 |
#
|
38 |
# # Penalty for big differences between columns
|
39 |
reward -= self.get_bumpiness_and_height(board)[0] / self.height
|
40 |
+
|
41 |
+
# Penalty for high columns
|
42 |
+
reward += (self.height / 2 ) - np.max(heights) * 0.5
|
43 |
#
|
44 |
# # Penalty for holes
|
45 |
# # holes_val = 0
|