aiflows
/

FunSearchFlowModule

Model card Files Files and versions Community

FunSearchFlowModule / ProgramDBFlowModule /artifacts /function.py

nbaldwin

first version FunSearch

97e363b about 1 year ago

raw

history blame contribute delete

4.91 kB

	"""
	This code is an implementation of Funsearch (https://www.nature.com/articles/s41586-023-06924-6) and is heavily inspired by the original code (https://github.com/google-deepmind/funsearch)

	Citation:

	@Article{FunSearch2023,
	author = {Romera-Paredes, Bernardino and Barekatain, Mohammadamin and Novikov, Alexander and Balog, Matej and Kumar, M. Pawan and Dupont, Emilien and Ruiz, Francisco J. R. and Ellenberg, Jordan and Wang, Pengming and Fawzi, Omar and Kohli, Pushmeet and Fawzi, Alhussein},
	journal = {Nature},
	title = {Mathematical discoveries from program search with large language models},
	year = {2023},
	doi = {10.1038/s41586-023-06924-6}
	}
	"""

	from . import AbstractArtifact
	import dataclasses
	import tokenize
	import io
	from collections.abc import Iterator, MutableSet, Sequence

	@dataclasses.dataclass
	class FunctionArtifact(AbstractArtifact):
	def __str__(self) -> str:

	return_type = f' -> {self.return_type}' if self.return_type else ''
	function = f'def {self.name}({self.args}){return_type}:\n'

	if self.docstring:
	# self.docstring is already indented on every line except the first one.
	# Here, we assume the indentation is always two spaces.
	new_line = '\n' if self.body else ''
	function += f' """{self.docstring}"""{new_line}'

	# self.body is already indented.
	function += self.body + '\n\n'
	return function

	@staticmethod
	def _tokenize(code: str) -> Iterator[tokenize.TokenInfo]:
	"""Transforms `code` into Python tokens."""
	code_bytes = code.encode()
	code_io = io.BytesIO(code_bytes)
	return tokenize.tokenize(code_io.readline)

	@staticmethod
	def _untokenize(tokens: Sequence[tokenize.TokenInfo]) -> str:
	"""Transforms a list of Python tokens into code."""
	code_bytes = tokenize.untokenize(tokens)
	return code_bytes.decode()

	def _get_artifacts_called(self) -> MutableSet[str]:
	"""Returns the set of all functions called in function."""
	code = str(self.body)
	return set(token.string for token, is_call in
	self._yield_token_and_is_call(code) if is_call)

	def calls_ancestor(self,artifact_to_evolve: str) -> bool:
	"""Returns whether the generated function is calling an earlier version."""

	for name in self._get_artifacts_called():
	# In `program` passed into this function the most recently generated
	# function has already been renamed to `function_to_evolve` (wihout the
	# suffix). Therefore any function call starting with `function_to_evolve_v`
	# is a call to an ancestor function.
	if name.startswith(f'{artifact_to_evolve}_v') and not name.startswith(self.name):
	return True
	return False


	def _yield_token_and_is_call(cls,code: str) -> Iterator[tuple[tokenize.TokenInfo, bool]]:
	"""Yields each token with a bool indicating whether it is a function call."""

	tokens = cls._tokenize(code)
	prev_token = None
	is_attribute_access = False
	for token in tokens:
	if (prev_token and # If the previous token exists and
	prev_token.type == tokenize.NAME and # it is a Python identifier
	token.type == tokenize.OP and # and the current token is a delimiter
	token.string == "("
	): # and in particular it is '('.
	yield prev_token, not is_attribute_access
	is_attribute_access = False
	else:
	if prev_token:
	is_attribute_access = (
	prev_token.type == tokenize.OP and prev_token.string == '.'
	)
	yield prev_token, False

	prev_token = token
	if prev_token:
	yield prev_token, False


	def rename_artifact_calls(self, source_name, target_name) -> str:
	implementation = str(self)

	if source_name not in implementation:
	return implementation

	modified_tokens = []
	for token, is_call in self._yield_token_and_is_call(implementation):
	if is_call and token.string == source_name:
	# Replace the function name token
	modified_token = tokenize.TokenInfo(
	type=token.type,
	string=target_name,
	start=token.start,
	end=token.end,
	line=token.line,
	)
	modified_tokens.append(modified_token)
	else:
	modified_tokens.append(token)
	return self._untokenize(modified_tokens)