File size: 2,375 Bytes
0b8359d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

"""Tests for test_tasks."""

import numpy as np
import tensorflow as tf

from single_task import misc  # brain coder
from single_task import test_tasks  # brain coder


def get_reward(reward_fn, candidate):
  return sum(reward_fn(misc.bf_tokens_to_string(candidate)).episode_rewards)


class TestTasksTest(tf.test.TestCase):

  def testHillClimbingTask(self):
    task = test_tasks.BasicTaskManager(test_tasks.HillClimbingTask())
    reward_fns = task.rl_batch(1)
    reward_fn = reward_fns[0]
    self.assertTrue(np.isclose(get_reward(reward_fn, [1, 2, 0]), 8 / 12.))
    self.assertTrue(np.isclose(get_reward(reward_fn, [1, 2, 2, 0]), 11 / 12.))
    self.assertTrue(np.isclose(get_reward(reward_fn, [1, 2, 3, 0]), 1.0))
    self.assertTrue(
        np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 2, 0]), 1. + 8 / 12.))
    self.assertTrue(
        np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 0]), 2.0))
    self.assertTrue(
        np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 1, 8, 3, 0]), 3.0))
    self.assertTrue(
        np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 0]), 3.0))
    self.assertTrue(
        np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 1, 8, 3, 1, 0]),
                   3.0 - 4 / 12.))
    self.assertTrue(
        np.isclose(
            get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 1, 8, 3, 1, 1, 1, 1, 0]),
            2.0))
    self.assertTrue(
        np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 3, 0]),
                   3.0 + 1 / 12.))
    self.assertTrue(
        np.isclose(
            get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1,
                                   8, 5, 1, 6, 4, 2, 1, 8, 3, 0]),
            8.0))
    self.assertTrue(
        np.isclose(
            get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3, 2, 1,
                                   8, 5, 1, 6, 4, 2, 1, 8, 3, 1, 1, 0]),
            8.0 - 8 / 12.))
    self.assertTrue(
        np.isclose(get_reward(reward_fn, [1, 2, 3, 4, 5, 6, 7, 8, 7, 6, 5, 4, 3,
                                          2, 1, 8, 5, 1, 6, 4, 2, 1, 8, 3, 1, 1,
                                          1, 1, 1, 1, 1, 0]),
                   7.0))


if __name__ == '__main__':
  tf.test.main()