Create README.md
#1
by
Corianas
- opened
README.md
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
---
|
6 |
+
This is a Re-act style model.
|
7 |
+
|
8 |
+
Dataset was parsed with:
|
9 |
+
```
|
10 |
+
def extract_trajectory_info(data):
|
11 |
+
"""
|
12 |
+
Extracts the question, thoughts, actions, and observations from the trajectory field of the data.
|
13 |
+
|
14 |
+
Parameters:
|
15 |
+
data (dict): The data entry containing the trajectory field.
|
16 |
+
|
17 |
+
Returns:
|
18 |
+
dict: A dictionary containing the extracted question, thoughts, actions, and observations.
|
19 |
+
"""
|
20 |
+
# Extracting the question
|
21 |
+
question = data.get('question', '')
|
22 |
+
|
23 |
+
# Extracting thoughts, actions, and observations using regex
|
24 |
+
thoughts = re.findall(r'Thought \d+: (.+?)(?=Action|\Z)', data.get('trajectory', ''), re.DOTALL)
|
25 |
+
actions = re.findall(r'Action \d+: (.+?)(?=Observation|\Z)', data.get('trajectory', ''), re.DOTALL)
|
26 |
+
observations = re.findall(r'Observation \d+: (.+?)(?=Thought|\Z)', data.get('trajectory', ''), re.DOTALL)
|
27 |
+
|
28 |
+
# Cleaning up the extracted data
|
29 |
+
thoughts = [thought.strip() for thought in thoughts]
|
30 |
+
actions = [action.strip() for action in actions]
|
31 |
+
observations = [observation.strip() for observation in observations]
|
32 |
+
|
33 |
+
return {
|
34 |
+
'question': question,
|
35 |
+
'thoughts': thoughts,
|
36 |
+
'actions': actions,
|
37 |
+
'observations': observations
|
38 |
+
}
|
39 |
+
# Sample data
|
40 |
+
extracted_info = extract_trajectory_info(ds["train"][0])
|
41 |
+
```
|
42 |
+
Then remade into a new dataset with
|
43 |
+
```
|
44 |
+
# Predefine the instructions for the task
|
45 |
+
preamble = """Tools available:
|
46 |
+
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
|
47 |
+
(2) Lookup[keyword], which returns the next sentence containing the keyword in the current passage.
|
48 |
+
(3) Finish[answer], which returns the answer and finishes the task.
|
49 |
+
"""
|
50 |
+
dataset = []
|
51 |
+
# Iterate through a specified number of examples in the training set
|
52 |
+
for i in range(len(ds['train'])):
|
53 |
+
extracted_info = extract_trajectory_info(ds['train'][i])
|
54 |
+
|
55 |
+
# Iterate through each thought in the extracted information
|
56 |
+
for j in range(len(extracted_info['thoughts'])):
|
57 |
+
out = f"{preamble}---\nQuestion: {extracted_info['question']}\n"
|
58 |
+
prev = ""
|
59 |
+
# Construct output for the first thought
|
60 |
+
if j == 0:
|
61 |
+
out += f"Thought: {extracted_info['thoughts'][0]}\n"
|
62 |
+
out += f"Action: {extracted_info['actions'][0]}\nPAUSE\n\n\n\n"
|
63 |
+
|
64 |
+
else:
|
65 |
+
for k in range(1, j + 1):
|
66 |
+
# Use appropriate indexing to avoid out-of-bounds errors
|
67 |
+
prev += f"Thought:{extracted_info['thoughts'][j - k]}\n"
|
68 |
+
prev += f"Action: {extracted_info['actions'][j - k]}\nPAUSE\n"
|
69 |
+
|
70 |
+
prev += f"Observation: {extracted_info['observations'][j - k]}\n"
|
71 |
+
|
72 |
+
out += prev # Remove trailing space
|
73 |
+
out += f"---\nThought: {extracted_info['thoughts'][j]}\n"
|
74 |
+
out += f"Action: {extracted_info['actions'][j]}\nPAUSE\n\n\n\n"
|
75 |
+
|
76 |
+
# Print the constructed output
|
77 |
+
print(out)
|
78 |
+
dataset.append(out)
|
79 |
+
#print(len(out))
|
80 |
+
```
|