marci0929 commited on
Commit
715111b
1 Parent(s): 13bec41

updated model

Browse files
Files changed (2) hide show
  1. my_model.zip +2 -2
  2. reward_wrapper.py +4 -1
my_model.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2f023b0292ff0d225d43e005826d45ce4e0f24ef202bbc1ba08e6f1960ffcc8
3
- size 2400942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f6f53bbf5b346674f37ce2eb18bbe4ce2eafe9b934ac9c44df2f289e1efa2c1
3
+ size 6980981
reward_wrapper.py CHANGED
@@ -19,7 +19,7 @@ class CustomRewardWrapper(gym.Wrapper):
19
  heights = obs["heights"]
20
 
21
  # # Default reward
22
- reward = 2
23
  # # reward = ((self.height - max(heights)) / self.height)
24
  # # reward += np.sum(board)
25
  # reward = (self.height - max(heights)) / self.height
@@ -37,6 +37,9 @@ class CustomRewardWrapper(gym.Wrapper):
37
  #
38
  # # Penalty for big differences between columns
39
  reward -= self.get_bumpiness_and_height(board)[0] / self.height
 
 
 
40
  #
41
  # # Penalty for holes
42
  # # holes_val = 0
 
19
  heights = obs["heights"]
20
 
21
  # # Default reward
22
+ reward = 1
23
  # # reward = ((self.height - max(heights)) / self.height)
24
  # # reward += np.sum(board)
25
  # reward = (self.height - max(heights)) / self.height
 
37
  #
38
  # # Penalty for big differences between columns
39
  reward -= self.get_bumpiness_and_height(board)[0] / self.height
40
+
41
+ # Penalty for high columns
42
+ reward += (self.height / 2 ) - np.max(heights) * 0.5
43
  #
44
  # # Penalty for holes
45
  # # holes_val = 0