File size: 9,709 Bytes
e1c1753
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
from dreamcoder.utilities import eprint
import random


class DefaultTaskBatcher:
        """Iterates through task batches of the specified size. Defaults to all tasks if taskBatchSize is None."""

        def __init__(self):
                pass

        def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
                if taskBatchSize is None:
                        taskBatchSize = len(tasks)
                elif taskBatchSize > len(tasks):
                        eprint("Task batch size is greater than total number of tasks, aborting.")
                        assert False
                

                start = (taskBatchSize * currIteration) % len(tasks)
                end = start + taskBatchSize
                taskBatch = (tasks + tasks)[start:end] # Handle wraparound.
                return taskBatch

class RandomTaskBatcher:
        """Returns a randomly sampled task batch of the specified size. Defaults to all tasks if taskBatchSize is None."""

        def __init__(self):
                pass

        def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
                if taskBatchSize is None:
                        taskBatchSize = len(tasks)
                elif taskBatchSize > len(tasks):
                        eprint("Task batch size is greater than total number of tasks, aborting.")
                        assert False

                return random.sample(tasks, taskBatchSize)

class RandomShuffleTaskBatcher:
        """Randomly shuffles the task batch first, and then iterates through task batches of the specified size like DefaultTaskBatcher.
           Reshuffles across iterations - intended as benchmark comparison to test the task ordering."""
        def __init__(self, baseSeed=0): self.baseSeed = baseSeed

        def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
                if taskBatchSize is None:
                        taskBatchSize = len(tasks)
                elif taskBatchSize > len(tasks):
                        eprint("Task batch size is greater than total number of tasks, aborting.")
                        assert False
                
                # Reshuffles tasks in a fixed way across epochs for reproducibility.
                currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks)))

                shuffledTasks = tasks.copy() # Since shuffle works in place.
                random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks)

                shuffledTasksWrap = tasks.copy() # Since shuffle works in place.
                random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap)

                start = (taskBatchSize * currIteration) % len(shuffledTasks)
                end = start + taskBatchSize
                taskBatch = (shuffledTasks + shuffledTasksWrap)[start:end] # Wraparound nicely.

                return list(set(taskBatch))

class UnsolvedTaskBatcher:
        """At a given epoch, returns only batches of the tasks that have not been solved at least twice"""

        def __init__(self):
                self.timesSolved = {} # map from task to times that we have solved it
                self.start = 0

        def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
                assert taskBatchSize is None, "This batching strategy does not support batch sizes"

                for t,f in ec_result.allFrontiers.items():
                        if f.empty:
                                self.timesSolved[t] = max(0, self.timesSolved.get(t,0))
                        else:
                                self.timesSolved[t] = 1 + self.timesSolved.get(t, 0)
                return [t for t in tasks if self.timesSolved.get(t,0) < 2 ]
        
def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio):
        numRandom = int(randomRatio * taskBatchSize)
        numEntropy = taskBatchSize - numRandom

        eprint("Selecting top %d tasks from the %d overall tasks given lowest entropy." % (taskBatchSize, len(tasks)))
        eprint("Will be selecting %d by lowest entropy and %d randomly." %(numEntropy, numRandom))
        taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(tasks)
        sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x:x[1])

        entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]]
        randomBatch = random.sample([task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom)
        batch = entropyBatch + randomBatch

        return batch

def kNearestNeighbors(ec_result, tasks, k, task):
        """Finds the k nearest neighbors in the recognition model logProduction space to a given task."""
        import numpy as np
        cosDistance = ec_result.recognitionModel.grammarLogProductionDistanceToTask(task, tasks)
        argSort = np.argsort(-cosDistance)# Want the greatest similarity.
        topK = argSort[:k]
        topKTasks = list(np.array(tasks)[topK])
        return topKTasks


class RandomkNNTaskBatcher:
        """Chooses a random task and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits."""
        def __init__(self):
                pass

        def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
                if taskBatchSize is None:
                        taskBatchSize = len(tasks)
                elif taskBatchSize > len(tasks):
                        eprint("Task batch size is greater than total number of tasks, aborting.")
                        assert False

                if ec_result.recognitionModel is None:
                        eprint("No recognition model, falling back on random %d" % taskBatchSize)
                        return random.sample(tasks, taskBatchSize)
                else:
                        randomTask = random.choice(tasks)
                        kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask)
                        return [randomTask] + kNN

class RandomLowEntropykNNTaskBatcher:
        """Choose a random task from the 10 unsolved with the lowest entropy, and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits."""
        def __init__(self):
                pass

        def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
                unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]

                if taskBatchSize is None:
                        return unsolvedTasks
                elif taskBatchSize > len(tasks):
                        eprint("Task batch size is greater than total number of tasks, aborting.")
                        assert False

                if ec_result.recognitionModel is None:
                        eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
                        return random.sample(unsolvedTasks, taskBatchSize)
                else:
                        lowEntropyUnsolved = entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0)
                        randomTask = random.choice(lowEntropyUnsolved)
                        kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask)
                        return [randomTask] + kNN


class UnsolvedEntropyTaskBatcher:
        """Returns tasks that have never been solved at any previous iteration.
           Given a task batch size, returns the unsolved tasks with the lowest entropy."""
        def __init__(self):
                pass

        def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
                unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]

                if taskBatchSize is None:
                        return unsolvedTasks
                elif taskBatchSize > len(tasks):
                        eprint("Task batch size is greater than total number of tasks, aborting.")
                        assert False

                if ec_result.recognitionModel is None:
                        eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
                        return random.sample(unsolvedTasks, taskBatchSize)
                else:
                        return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0)

class UnsolvedRandomEntropyTaskBatcher:
        """Returns tasks that have never been solved at any previous iteration.
           Given a task batch size, returns a mix of unsolved tasks with percentRandom 
           selected randomly and the remaining selected by lowest entropy.""" 
        def __init__(self):
                pass

        def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
                unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]

                if taskBatchSize is None:
                        return unsolvedTasks
                elif taskBatchSize > len(tasks):
                        eprint("Task batch size is greater than total number of tasks, aborting.")
                        assert False

                if ec_result.recognitionModel is None:
                        eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
                        return random.sample(unsolvedTasks, taskBatchSize)
                else:
                        return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=.5)