Corianas
/

llama-tiny-reactor

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

llama-tiny-reactor / README.md

Corianas's picture

Create README.md

d864f8e verified 8 months ago

|

3.06 kB

	---
	license: apache-2.0
	language:
	- en
	---
	This is a Re-act style model.

	Dataset was parsed with:
	```
	def extract_trajectory_info(data):
	"""
	Extracts the question, thoughts, actions, and observations from the trajectory field of the data.

	Parameters:
	data (dict): The data entry containing the trajectory field.

	Returns:
	dict: A dictionary containing the extracted question, thoughts, actions, and observations.
	"""
	# Extracting the question
	question = data.get('question', '')

	# Extracting thoughts, actions, and observations using regex
	thoughts = re.findall(r'Thought \d+: (.+?)(?=Action\|\Z)', data.get('trajectory', ''), re.DOTALL)
	actions = re.findall(r'Action \d+: (.+?)(?=Observation\|\Z)', data.get('trajectory', ''), re.DOTALL)
	observations = re.findall(r'Observation \d+: (.+?)(?=Thought\|\Z)', data.get('trajectory', ''), re.DOTALL)

	# Cleaning up the extracted data
	thoughts = [thought.strip() for thought in thoughts]
	actions = [action.strip() for action in actions]
	observations = [observation.strip() for observation in observations]

	return {
	'question': question,
	'thoughts': thoughts,
	'actions': actions,
	'observations': observations
	}
	# Sample data
	extracted_info = extract_trajectory_info(ds["train"][0])
	```
	Then remade into a new dataset with
	```
	# Predefine the instructions for the task
	preamble = """Tools available:
	(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
	(2) Lookup[keyword], which returns the next sentence containing the keyword in the current passage.
	(3) Finish[answer], which returns the answer and finishes the task.
	"""
	dataset = []
	# Iterate through a specified number of examples in the training set
	for i in range(len(ds['train'])):
	extracted_info = extract_trajectory_info(ds['train'][i])

	# Iterate through each thought in the extracted information
	for j in range(len(extracted_info['thoughts'])):
	out = f"{preamble}---\nQuestion: {extracted_info['question']}\n"
	prev = ""
	# Construct output for the first thought
	if j == 0:
	out += f"Thought: {extracted_info['thoughts'][0]}\n"
	out += f"Action: {extracted_info['actions'][0]}\nPAUSE\n\n\n\n"

	else:
	for k in range(1, j + 1):
	# Use appropriate indexing to avoid out-of-bounds errors
	prev += f"Thought:{extracted_info['thoughts'][j - k]}\n"
	prev += f"Action: {extracted_info['actions'][j - k]}\nPAUSE\n"

	prev += f"Observation: {extracted_info['observations'][j - k]}\n"

	out += prev # Remove trailing space
	out += f"---\nThought: {extracted_info['thoughts'][j]}\n"
	out += f"Action: {extracted_info['actions'][j]}\nPAUSE\n\n\n\n"

	# Print the constructed output
	print(out)
	dataset.append(out)
	#print(len(out))
	```