Fraser-Greenlee

add dreamcoder codebase

e1c1753 almost 3 years ago

9.71 kB

	from dreamcoder.utilities import eprint
	import random


	class DefaultTaskBatcher:
	"""Iterates through task batches of the specified size. Defaults to all tasks if taskBatchSize is None."""

	def __init__(self):
	pass

	def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
	if taskBatchSize is None:
	taskBatchSize = len(tasks)
	elif taskBatchSize > len(tasks):
	eprint("Task batch size is greater than total number of tasks, aborting.")
	assert False


	start = (taskBatchSize * currIteration) % len(tasks)
	end = start + taskBatchSize
	taskBatch = (tasks + tasks)[start:end] # Handle wraparound.
	return taskBatch

	class RandomTaskBatcher:
	"""Returns a randomly sampled task batch of the specified size. Defaults to all tasks if taskBatchSize is None."""

	def __init__(self):
	pass

	def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
	if taskBatchSize is None:
	taskBatchSize = len(tasks)
	elif taskBatchSize > len(tasks):
	eprint("Task batch size is greater than total number of tasks, aborting.")
	assert False

	return random.sample(tasks, taskBatchSize)

	class RandomShuffleTaskBatcher:
	"""Randomly shuffles the task batch first, and then iterates through task batches of the specified size like DefaultTaskBatcher.
	Reshuffles across iterations - intended as benchmark comparison to test the task ordering."""
	def __init__(self, baseSeed=0): self.baseSeed = baseSeed

	def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
	if taskBatchSize is None:
	taskBatchSize = len(tasks)
	elif taskBatchSize > len(tasks):
	eprint("Task batch size is greater than total number of tasks, aborting.")
	assert False

	# Reshuffles tasks in a fixed way across epochs for reproducibility.
	currEpoch = int(int(currIteration * taskBatchSize) / int(len(tasks)))

	shuffledTasks = tasks.copy() # Since shuffle works in place.
	random.Random(self.baseSeed + currEpoch).shuffle(shuffledTasks)

	shuffledTasksWrap = tasks.copy() # Since shuffle works in place.
	random.Random(self.baseSeed + currEpoch + 1).shuffle(shuffledTasksWrap)

	start = (taskBatchSize * currIteration) % len(shuffledTasks)
	end = start + taskBatchSize
	taskBatch = (shuffledTasks + shuffledTasksWrap)[start:end] # Wraparound nicely.

	return list(set(taskBatch))

	class UnsolvedTaskBatcher:
	"""At a given epoch, returns only batches of the tasks that have not been solved at least twice"""

	def __init__(self):
	self.timesSolved = {} # map from task to times that we have solved it
	self.start = 0

	def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
	assert taskBatchSize is None, "This batching strategy does not support batch sizes"

	for t,f in ec_result.allFrontiers.items():
	if f.empty:
	self.timesSolved[t] = max(0, self.timesSolved.get(t,0))
	else:
	self.timesSolved[t] = 1 + self.timesSolved.get(t, 0)
	return [t for t in tasks if self.timesSolved.get(t,0) < 2 ]

	def entropyRandomBatch(ec_result, tasks, taskBatchSize, randomRatio):
	numRandom = int(randomRatio * taskBatchSize)
	numEntropy = taskBatchSize - numRandom

	eprint("Selecting top %d tasks from the %d overall tasks given lowest entropy." % (taskBatchSize, len(tasks)))
	eprint("Will be selecting %d by lowest entropy and %d randomly." %(numEntropy, numRandom))
	taskGrammarEntropies = ec_result.recognitionModel.taskGrammarEntropies(tasks)
	sortedEntropies = sorted(taskGrammarEntropies.items(), key=lambda x:x[1])

	entropyBatch = [task for (task, entropy) in sortedEntropies[:numEntropy]]
	randomBatch = random.sample([task for (task, entropy) in sortedEntropies[numEntropy:]], numRandom)
	batch = entropyBatch + randomBatch

	return batch

	def kNearestNeighbors(ec_result, tasks, k, task):
	"""Finds the k nearest neighbors in the recognition model logProduction space to a given task."""
	import numpy as np
	cosDistance = ec_result.recognitionModel.grammarLogProductionDistanceToTask(task, tasks)
	argSort = np.argsort(-cosDistance)# Want the greatest similarity.
	topK = argSort[:k]
	topKTasks = list(np.array(tasks)[topK])
	return topKTasks


	class RandomkNNTaskBatcher:
	"""Chooses a random task and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits."""
	def __init__(self):
	pass

	def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
	if taskBatchSize is None:
	taskBatchSize = len(tasks)
	elif taskBatchSize > len(tasks):
	eprint("Task batch size is greater than total number of tasks, aborting.")
	assert False

	if ec_result.recognitionModel is None:
	eprint("No recognition model, falling back on random %d" % taskBatchSize)
	return random.sample(tasks, taskBatchSize)
	else:
	randomTask = random.choice(tasks)
	kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask)
	return [randomTask] + kNN

	class RandomLowEntropykNNTaskBatcher:
	"""Choose a random task from the 10 unsolved with the lowest entropy, and finds the (taskBatchSize - 1) nearest neighbors using the recognition model logits."""
	def __init__(self):
	pass

	def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
	unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]

	if taskBatchSize is None:
	return unsolvedTasks
	elif taskBatchSize > len(tasks):
	eprint("Task batch size is greater than total number of tasks, aborting.")
	assert False

	if ec_result.recognitionModel is None:
	eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
	return random.sample(unsolvedTasks, taskBatchSize)
	else:
	lowEntropyUnsolved = entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0)
	randomTask = random.choice(lowEntropyUnsolved)
	kNN = kNearestNeighbors(ec_result, tasks, taskBatchSize - 1, randomTask)
	return [randomTask] + kNN


	class UnsolvedEntropyTaskBatcher:
	"""Returns tasks that have never been solved at any previous iteration.
	Given a task batch size, returns the unsolved tasks with the lowest entropy."""
	def __init__(self):
	pass

	def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
	unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]

	if taskBatchSize is None:
	return unsolvedTasks
	elif taskBatchSize > len(tasks):
	eprint("Task batch size is greater than total number of tasks, aborting.")
	assert False

	if ec_result.recognitionModel is None:
	eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
	return random.sample(unsolvedTasks, taskBatchSize)
	else:
	return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=0)

	class UnsolvedRandomEntropyTaskBatcher:
	"""Returns tasks that have never been solved at any previous iteration.
	Given a task batch size, returns a mix of unsolved tasks with percentRandom
	selected randomly and the remaining selected by lowest entropy."""
	def __init__(self):
	pass

	def getTaskBatch(self, ec_result, tasks, taskBatchSize, currIteration):
	unsolvedTasks = [t for t in tasks if ec_result.allFrontiers[t].empty]

	if taskBatchSize is None:
	return unsolvedTasks
	elif taskBatchSize > len(tasks):
	eprint("Task batch size is greater than total number of tasks, aborting.")
	assert False

	if ec_result.recognitionModel is None:
	eprint("No recognition model, falling back on random %d tasks from the remaining %d" %(taskBatchSize, len(unsolvedTasks)))
	return random.sample(unsolvedTasks, taskBatchSize)
	else:
	return entropyRandomBatch(ec_result, unsolvedTasks, taskBatchSize, randomRatio=.5)