Spaces:
Sleeping
Sleeping
import argparse | |
import json | |
import requests | |
import time | |
import warnings | |
from n_tokens import estimate_price | |
import pickle | |
import numpy as np | |
import torch | |
from pathlib import Path | |
# from utils.babyai_utils.baby_agent import load_agent | |
from utils import * | |
from textworld_utils.utils import generate_text_obs | |
from models import * | |
import subprocess | |
import os | |
from matplotlib import pyplot as plt | |
from gym_minigrid.wrappers import * | |
from gym_minigrid.window import Window | |
from datetime import datetime | |
from imageio import mimsave | |
def new_episode_marker(): | |
return "New episode.\n" | |
def success_marker(): | |
return "Success!\n" | |
def failure_marker(): | |
return "Failure!\n" | |
def action_query(): | |
return "Act :" | |
def get_parsed_action(text_action): | |
""" | |
Parses the text generated by a model and extracts the action | |
""" | |
if "move forward" in text_action: | |
return "move forward" | |
elif "done" in text_action: | |
return "done" | |
elif "turn left" in text_action: | |
return "turn left" | |
elif "turn right" in text_action: | |
return "turn right" | |
elif "toggle" in text_action: | |
return "toggle" | |
elif "no_op" in text_action: | |
return "no_op" | |
else: | |
warnings.warn(f"Undefined action {text_action}") | |
return "no_op" | |
def action_to_prompt_action_text(action): | |
if np.allclose(action, [int(env.actions.forward), np.nan, np.nan], equal_nan=True): | |
# 2 | |
text_action = "move forward" | |
elif np.allclose(action, [int(env.actions.left), np.nan, np.nan], equal_nan=True): | |
# 0 | |
text_action = "turn left" | |
elif np.allclose(action, [int(env.actions.right), np.nan, np.nan], equal_nan=True): | |
# 1 | |
text_action = "turn right" | |
elif np.allclose(action, [int(env.actions.toggle), np.nan, np.nan], equal_nan=True): | |
# 3 | |
text_action = "toggle" | |
elif np.allclose(action, [int(env.actions.done), np.nan, np.nan], equal_nan=True): | |
# 4 | |
text_action = "done" | |
elif np.allclose(action, [np.nan, np.nan, np.nan], equal_nan=True): | |
text_action = "no_op" | |
else: | |
warnings.warn(f"Undefined action {action}") | |
return "no_op" | |
return f"{action_query()} {text_action}\n" | |
def text_action_to_action(text_action): | |
# text_action = get_parsed_action(text_action) | |
if "move forward" == text_action: | |
action = [int(env.actions.forward), np.nan, np.nan] | |
elif "turn left" == text_action: | |
action = [int(env.actions.left), np.nan, np.nan] | |
elif "turn right" == text_action: | |
action = [int(env.actions.right), np.nan, np.nan] | |
elif "toggle" == text_action: | |
action = [int(env.actions.toggle), np.nan, np.nan] | |
elif "done" == text_action: | |
action = [int(env.actions.done), np.nan, np.nan] | |
elif "no_op" == text_action: | |
action = [np.nan, np.nan, np.nan] | |
return action | |
def prompt_preprocessor(llm_prompt): | |
# remove peer observations | |
lines = llm_prompt.split("\n") | |
new_lines = [] | |
for line in lines: | |
if line.startswith("#"): | |
continue | |
elif line.startswith("Conversation"): | |
continue | |
elif "peer" in line: | |
caretaker = True | |
if caretaker: | |
# show only the location of the caretaker | |
# this is very ugly, todo: refactor this | |
assert "there is a" in line | |
start_index = line.index('there is a') + 11 | |
new_line = line[:start_index] + 'caretaker' | |
new_lines.append(new_line) | |
else: | |
# no caretaker at all | |
if line.startswith("Obs :") and "peer" in line: | |
# remove only the peer descriptions | |
line = "Obs :" | |
new_lines.append(line) | |
else: | |
assert "peer" in line | |
elif "Caretaker:" in line: | |
line = line.replace("Caretaker:", "Caretaker says: ") | |
new_lines.append(line) | |
else: | |
new_lines.append(line) | |
return "\n".join(new_lines) | |
# def generate_text_obs(obs, info): | |
# | |
# text_observation = obs_to_text(info) | |
# | |
# llm_prompt = "Obs : " | |
# llm_prompt += "".join(text_observation) | |
# | |
# # add utterances | |
# if obs["utterance_history"] != "Conversation: \n": | |
# utt_hist = obs['utterance_history'] | |
# utt_hist = utt_hist.replace("Conversation: \n","") | |
# llm_prompt += utt_hist | |
# | |
# return llm_prompt | |
# def obs_to_text(info): | |
# image, vis_mask = info["image"], info["vis_mask"] | |
# carrying = info["carrying"] | |
# agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"] | |
# npc_actions_dict = info["npc_actions_dict"] | |
# | |
# # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state) | |
# # State, 0: open, 1: closed, 2: locked | |
# IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys())) | |
# IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys())) | |
# | |
# list_textual_descriptions = [] | |
# | |
# if carrying is not None: | |
# list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type)) | |
# | |
# # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1]) | |
# | |
# view_field_dictionary = dict() | |
# | |
# for i in range(image.shape[0]): | |
# for j in range(image.shape[1]): | |
# if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2: | |
# if i not in view_field_dictionary.keys(): | |
# view_field_dictionary[i] = dict() | |
# view_field_dictionary[i][j] = image[i][j] | |
# else: | |
# view_field_dictionary[i][j] = image[i][j] | |
# | |
# # Find the wall if any | |
# # We describe a wall only if there is no objects between the agent and the wall in straight line | |
# | |
# # Find wall in front | |
# add_wall_descr = False | |
# if add_wall_descr: | |
# j = agent_pos_vy - 1 | |
# object_seen = False | |
# while j >= 0 and not object_seen: | |
# if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1: | |
# if image[agent_pos_vx][j][0] == 2: | |
# list_textual_descriptions.append( | |
# f"A wall is {agent_pos_vy - j} steps in front of you. \n") # forward | |
# object_seen = True | |
# else: | |
# object_seen = True | |
# j -= 1 | |
# # Find wall left | |
# i = agent_pos_vx - 1 | |
# object_seen = False | |
# while i >= 0 and not object_seen: | |
# if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1: | |
# if image[i][agent_pos_vy][0] == 2: | |
# list_textual_descriptions.append( | |
# f"A wall is {agent_pos_vx - i} steps to the left. \n") # left | |
# object_seen = True | |
# else: | |
# object_seen = True | |
# i -= 1 | |
# # Find wall right | |
# i = agent_pos_vx + 1 | |
# object_seen = False | |
# while i < image.shape[0] and not object_seen: | |
# if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1: | |
# if image[i][agent_pos_vy][0] == 2: | |
# list_textual_descriptions.append( | |
# f"A wall is {i - agent_pos_vx} steps to the right. \n") # right | |
# object_seen = True | |
# else: | |
# object_seen = True | |
# i += 1 | |
# | |
# # list_textual_descriptions.append("You see the following objects: ") | |
# # returns the position of seen objects relative to you | |
# for i in view_field_dictionary.keys(): | |
# for j in view_field_dictionary[i].keys(): | |
# if i != agent_pos_vx or j != agent_pos_vy: | |
# object = view_field_dictionary[i][j] | |
# | |
# # # don't show npc | |
# # if IDX_TO_OBJECT[object[0]] == "npc": | |
# # continue | |
# | |
# front_dist = agent_pos_vy - j | |
# left_right_dist = i - agent_pos_vx | |
# | |
# loc_descr = "" | |
# if front_dist == 1 and left_right_dist == 0: | |
# loc_descr += "Right in front of you " | |
# | |
# elif left_right_dist == 1 and front_dist == 0: | |
# loc_descr += "Just to the right of you" | |
# | |
# elif left_right_dist == -1 and front_dist == 0: | |
# loc_descr += "Just to the left of you" | |
# | |
# else: | |
# front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else "" | |
# | |
# loc_descr += front_str | |
# | |
# suff = "s" if abs(left_right_dist) > 0 else "" | |
# and_ = "and" if loc_descr != "" else "" | |
# | |
# if left_right_dist < 0: | |
# left_right_str = f"{and_} {-left_right_dist} step{suff} to the left" | |
# loc_descr += left_right_str | |
# | |
# elif left_right_dist > 0: | |
# left_right_str = f"{and_} {left_right_dist} step{suff} to the right" | |
# loc_descr += left_right_str | |
# | |
# else: | |
# left_right_str = "" | |
# loc_descr += left_right_str | |
# | |
# loc_descr += f" there is a " | |
# | |
# obj_type = IDX_TO_OBJECT[object[0]] | |
# if obj_type == "npc": | |
# IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'} | |
# | |
# description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. " | |
# | |
# # gaze | |
# gaze_dir = { | |
# 0: "towards you", | |
# 1: "to the left of you", | |
# 2: "in the same direction as you", | |
# 3: "to the right of you", | |
# } | |
# description += f"It is looking {gaze_dir[object[3]]}. " | |
# | |
# # point | |
# point_dir = { | |
# 0: "towards you", | |
# 1: "to the left of you", | |
# 2: "in the same direction as you", | |
# 3: "to the right of you", | |
# } | |
# | |
# if object[4] != 255: | |
# description += f"It is pointing {point_dir[object[4]]}. " | |
# | |
# # last action | |
# last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]] | |
# | |
# last_action = { | |
# "go_forward": "foward", | |
# "rotate_left": "turn left", | |
# "rotate_right": "turn right", | |
# "toggle_action": "toggle", | |
# "point_stop_point": "stop pointing", | |
# "point_E": "", | |
# "point_S": "", | |
# "point_W": "", | |
# "point_N": "", | |
# "stop_point": "stop pointing", | |
# "no_op": "" | |
# }[last_action] | |
# | |
# if last_action not in ["no_op", ""]: | |
# description += f"It's last action is {last_action}. " | |
# | |
# elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]: | |
# # todo: this assumes that Switch.no_light == True | |
# description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " | |
# assert object[2:].mean() == 0 | |
# | |
# elif obj_type == "lockablebox": | |
# IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'} | |
# description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " | |
# assert object[3:].mean() == 0 | |
# | |
# elif obj_type == "applegenerator": | |
# IDX_TO_STATE = {1: 'square', 2: 'round'} | |
# description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " | |
# assert object[3:].mean() == 0 | |
# | |
# elif obj_type == "remotedoor": | |
# IDX_TO_STATE = {0: 'open', 1: 'closed'} | |
# description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " | |
# assert object[3:].mean() == 0 | |
# | |
# elif obj_type == "door": | |
# IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'} | |
# description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " | |
# assert object[3:].mean() == 0 | |
# | |
# elif obj_type == "lever": | |
# IDX_TO_STATE = {1: 'activated', 0: 'unactivated'} | |
# if object[3] == 255: | |
# countdown_txt = "" | |
# else: | |
# countdown_txt = f"with {object[3]} timesteps left. " | |
# | |
# description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}" | |
# | |
# assert object[4:].mean() == 0 | |
# else: | |
# raise ValueError(f"Undefined object type {obj_type}") | |
# | |
# full_destr = loc_descr + description + "\n" | |
# | |
# list_textual_descriptions.append(full_destr) | |
# | |
# if len(list_textual_descriptions) == 0: | |
# list_textual_descriptions.append("\n") | |
# | |
# return list_textual_descriptions | |
def plt_2_rgb(env): | |
# data = np.frombuffer(env.window.fig.canvas.tostring_rgb(), dtype=np.uint8) | |
# data = data.reshape(env.window.fig.canvas.get_width_height()[::-1] + (3,)) | |
width, height = env.window.fig.get_size_inches() * env.window.fig.get_dpi() | |
data = np.fromstring(env.window.fig.canvas.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3) | |
return data | |
def reset(env): | |
env.reset() | |
# a dirty trick just to get obs and info | |
return env.step([np.nan, np.nan, np.nan]) | |
# return step("no_op") | |
def generate(text_input, model): | |
# return "(a) move forward" | |
if model == "dummy": | |
print("dummy action forward") | |
return "move forward" | |
elif model == "interactive": | |
return input("Enter action:") | |
elif model == "random": | |
print("random agent") | |
print("PROMPT:") | |
print(text_input) | |
return random.choice([ | |
"move forward", | |
"turn left", | |
"turn right", | |
"toggle", | |
]) | |
elif model in ["gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-4-0613", "gpt-4-0314"]: | |
while True: | |
try: | |
c = openai.ChatCompletion.create( | |
model=model, | |
messages=[ | |
# {"role": "system", "content": ""}, | |
# {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}, | |
# {"role": "user", "content": "Continue the following text in the most logical way.\n"+text_input} | |
# {"role": "system", "content": | |
# "You are an agent and can use the following actions: 'move forward', 'toggle', 'turn left', 'turn right', 'done'." | |
# # "The caretaker will say the color of the box which you should open. Turn until you find this box and toggle it when it is right in front of it." | |
# # "Then an apple will appear and you can toggle it to succeed." | |
# }, | |
{"role": "user", "content": text_input} | |
], | |
max_tokens=3, | |
n=1, | |
temperature=0.0, | |
request_timeout=30, | |
) | |
break | |
except Exception as e: | |
print(e) | |
print("Pausing") | |
time.sleep(10) | |
continue | |
print("->LLM generation: ", c['choices'][0]['message']['content']) | |
return c['choices'][0]['message']['content'] | |
elif re.match(r"text-.*-\d{3}", model) or model in ["gpt-3.5-turbo-instruct-0914"]: | |
while True: | |
try: | |
response = openai.Completion.create( | |
model=model, | |
prompt=text_input, | |
# temperature=0.7, | |
temperature=0.0, | |
max_tokens=3, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0, | |
timeout=30 | |
) | |
break | |
except Exception as e: | |
print(e) | |
print("Pausing") | |
time.sleep(10) | |
continue | |
choices = response["choices"] | |
assert len(choices) == 1 | |
return choices[0]["text"].strip().lower() # remove newline from the end | |
elif model in ["gpt2_large", "api_bloom"]: | |
# HF_TOKEN = os.getenv("HF_TOKEN") | |
if model == "gpt2_large": | |
API_URL = "https://api-inference.huggingface.co/models/gpt2-large" | |
elif model == "api_bloom": | |
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom" | |
else: | |
raise ValueError(f"Undefined model {model}.") | |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
def query(text_prompt, n_tokens=3): | |
input = text_prompt | |
# make n_tokens request and append the output each time - one request generates one token | |
for _ in range(n_tokens): | |
# prepare request | |
payload = { | |
"inputs": input, | |
"parameters": { | |
"do_sample": False, | |
'temperature': 0, | |
'wait_for_model': True, | |
# "max_length": 500, # for gpt2 | |
# "max_new_tokens": 250 # fot gpt2-xl | |
}, | |
} | |
data = json.dumps(payload) | |
# request | |
response = requests.request("POST", API_URL, headers=headers, data=data) | |
response_json = json.loads(response.content.decode("utf-8")) | |
if type(response_json) is list and len(response_json) == 1: | |
# generated_text contains the input + the response | |
response_full_text = response_json[0]['generated_text'] | |
# we use this as the next input | |
input = response_full_text | |
else: | |
print("Invalid request to huggingface api") | |
from IPython import embed; embed() | |
# remove the prompt from the beginning | |
assert response_full_text.startswith(text_prompt) | |
response_text = response_full_text[len(text_prompt):] | |
return response_text | |
response = query(text_input).strip().lower() | |
return response | |
elif model in ["bloom_560m"]: | |
# from transformers import BloomForCausalLM | |
# from transformers import BloomTokenizerFast | |
# | |
# tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m", cache_dir=".cache/huggingface/") | |
# model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m", cache_dir=".cache/huggingface/") | |
inputs = hf_tokenizer(text_input, return_tensors="pt") | |
# 3 words | |
result_length = inputs['input_ids'].shape[-1]+3 | |
full_output = hf_tokenizer.decode(hf_model.generate(inputs["input_ids"], max_length=result_length)[0]) | |
assert full_output.startswith(text_input) | |
response = full_output[len(text_input):] | |
response = response.strip().lower() | |
return response | |
else: | |
raise ValueError("Unknown model.") | |
def estimate_tokens_selenium(prompt): | |
# selenium is used because python3.9 is needed for tiktoken | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
import time | |
# Initialize the WebDriver instance | |
options = webdriver.ChromeOptions() | |
options.add_argument('headless') | |
# set up the driver | |
driver = webdriver.Chrome(options=options) | |
# Navigate to the website | |
driver.get('https://platform.openai.com/tokenizer') | |
text_input = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div/div[2]/div[3]/textarea') | |
text_input.clear() | |
text_input.send_keys(prompt) | |
n_tokens = 0 | |
while n_tokens == 0: | |
time.sleep(1) | |
# Wait for the response to be loaded | |
wait = WebDriverWait(driver, 10) | |
response = wait.until( | |
EC.presence_of_element_located((By.CSS_SELECTOR, 'div.tokenizer-stat:nth-child(1) > div:nth-child(2)'))) | |
n_tokens = int(response.text.replace(",", "")) | |
# Close the WebDriver instance | |
driver.quit() | |
return n_tokens | |
def load_in_context_examples(in_context_episodes): | |
in_context_examples = "" | |
print(f'Loading {len(in_context_episodes)} examples.') | |
for episode_data in in_context_episodes: | |
in_context_examples += new_episode_marker() | |
for step_i, step_data in enumerate(episode_data): | |
action = step_data["action"] | |
info = step_data["info"] | |
obs = step_data["obs"] | |
reward = step_data["reward"] | |
done = step_data["done"] | |
if step_i == 0: | |
# step 0 is the initial state of the environment | |
assert action is None | |
prompt_action_text = "" | |
else: | |
prompt_action_text = action_to_prompt_action_text(action) | |
text_obs = generate_text_obs(obs, info) | |
step_text = prompt_preprocessor(prompt_action_text + text_obs) | |
in_context_examples += step_text | |
if done: | |
if reward > 0: | |
in_context_examples += success_marker() | |
else: | |
in_context_examples += failure_marker() | |
else: | |
# in all envs reward is given in the end | |
# in the initial step rewards is None | |
assert reward == 0 or (step_i == 0 and reward is None) | |
print("-------------------------- IN CONTEXT EXAMPLES --------------------------") | |
print(in_context_examples) | |
print("-------------------------------------------------------------------------") | |
return in_context_examples | |
if __name__ == "__main__": | |
# Parse arguments | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--model", required=False, | |
help="text-ada-001") | |
parser.add_argument("--seed", type=int, default=0, | |
help="Seed of the first episode. The seed for the following episodes will be used in order: seed, seed + 1, ... seed + (n_episodes-1) (default: 0)") | |
parser.add_argument("--max-steps", type=int, default=15, | |
help="max num of steps") | |
parser.add_argument("--shift", type=int, default=0, | |
help="number of times the environment is reset at the beginning (default: 0)") | |
parser.add_argument("--argmax", action="store_true", default=False, | |
help="select the action with highest probability (default: False)") | |
parser.add_argument("--pause", type=float, default=0.5, | |
help="pause duration between two consequent actions of the agent (default: 0.5)") | |
parser.add_argument("--env-name", type=str, | |
default="SocialAI-AsocialBoxInformationSeekingParamEnv-v1", | |
# default="SocialAI-ColorBoxesLLMCSParamEnv-v1", | |
required=False, | |
help="env name") | |
parser.add_argument("--in-context-path", type=str, | |
# old | |
# default='llm_data/in_context_asocial_box.txt' | |
# default='llm_data/in_context_color_boxes.txt', | |
# new | |
# asocial box | |
default='llm_data/in_context_examples/in_context_asocialbox_SocialAI-AsocialBoxInformationSeekingParamEnv-v1_2023_07_19_19_28_48/episodes.pkl', | |
# colorbox | |
# default='llm_data/in_context_examples/in_context_colorbox_SocialAI-ColorBoxesLLMCSParamEnv-v1_2023_07_20_13_11_54/episodes.pkl', | |
required=False, | |
help="path to in context examples") | |
parser.add_argument("--episodes", type=int, default=10, | |
help="number of episodes to visualize") | |
parser.add_argument("--env-args", nargs='*', default=None) | |
parser.add_argument("--agent_view", default=False, help="draw the agent sees (partially observable view)", action='store_true' ) | |
parser.add_argument("--tile_size", type=int, help="size at which to render tiles", default=32 ) | |
parser.add_argument("--mask-unobserved", default=False, help="mask cells that are not observed by the agent", action='store_true' ) | |
parser.add_argument("--log", type=str, default="llm_log/episodes_log", help="log from the run", required=False) | |
parser.add_argument("--feed-full-ep", default=False, help="weather to append the whole episode to the prompt", action='store_true') | |
parser.add_argument("--last-n", type=int, help="how many last steps to provide in observation (if not feed-full-ep)", default=3) | |
parser.add_argument("--skip-check", default=False, help="Don't estimate the price.", action="store_true") | |
args = parser.parse_args() | |
# Set seed for all randomness sources | |
seed(args.seed) | |
model = args.model | |
in_context_examples_path = args.in_context_path | |
# test for paper: remove later | |
if "asocialbox" in in_context_examples_path: | |
assert args.env_name == "SocialAI-AsocialBoxInformationSeekingParamEnv-v1" | |
elif "colorbox" in in_context_examples_path: | |
assert args.env_name == "SocialAI-ColorBoxesLLMCSParamEnv-v1" | |
print("env name:", args.env_name) | |
print("examples:", in_context_examples_path) | |
print("model:", args.model) | |
# datetime | |
now = datetime.now() | |
datetime_string = now.strftime("%d_%m_%Y_%H:%M:%S") | |
print(datetime_string) | |
# log filenames | |
log_folder = args.log+"_"+datetime_string+"/" | |
os.mkdir(log_folder) | |
evaluation_log_filename = log_folder+"evaluation_log.json" | |
prompt_log_filename = log_folder + "prompt_log.txt" | |
ep_h_log_filename = log_folder+"episode_history_query.txt" | |
gif_savename = log_folder + "demo.gif" | |
env_args = env_args_str_to_dict(args.env_args) | |
env = make_env(args.env_name, args.seed, env_args) | |
# env = gym.make(args.env_name, **env_args) | |
print(f"Environment {args.env_name} and args: {env_args_str_to_dict(args.env_args)}\n") | |
# Define agent | |
print("Agent loaded\n") | |
# prepare models | |
model_instance = None | |
if "text" in args.model or "gpt-3" in args.model or "gpt-4" in args.model: | |
import openai | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
elif args.model in ["gpt2_large", "api_bloom"]: | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
elif args.model in ["bloom_560m"]: | |
from transformers import BloomForCausalLM | |
from transformers import BloomTokenizerFast | |
hf_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m", cache_dir=".cache/huggingface/") | |
hf_model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m", cache_dir=".cache/huggingface/") | |
elif args.model in ["bloom"]: | |
from transformers import BloomForCausalLM | |
from transformers import BloomTokenizerFast | |
hf_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom", cache_dir=".cache/huggingface/") | |
hf_model = BloomForCausalLM.from_pretrained("bigscience/bloom", cache_dir=".cache/huggingface/") | |
model_instance = (hf_tokenizer, hf_model) | |
with open(in_context_examples_path, "rb") as f: | |
in_context_episodes = pickle.load(f) | |
in_context_examples = load_in_context_examples(in_context_episodes) | |
with open(prompt_log_filename, "a+") as f: | |
f.write(datetime_string) | |
with open(ep_h_log_filename, "a+") as f: | |
f.write(datetime_string) | |
full_episode_history = args.feed_full_ep | |
last_n=args.last_n | |
if full_episode_history: | |
print("Full episode history.") | |
else: | |
print(f"Last {args.last_n} steps.") | |
if not args.skip_check and not args.model in ["dummy", "random", "interactive"]: | |
print(f"Estimating price for model {args.model}.") | |
in_context_n_tokens = estimate_tokens_selenium(in_context_examples) | |
n_in_context_steps = sum([len(ep) for ep in in_context_episodes]) | |
tokens_per_step = in_context_n_tokens / n_in_context_steps | |
_, price = estimate_price( | |
num_of_episodes=args.episodes, | |
in_context_len=in_context_n_tokens, | |
tokens_per_step=tokens_per_step, | |
n_steps=args.max_steps, | |
last_n=last_n, | |
model=args.model, | |
feed_episode_history=full_episode_history | |
) | |
input(f"You will spend: {price} dollars. ok?") | |
# prepare frames list to save to gif | |
frames = [] | |
assert args.max_steps <= 20 | |
success_rates = [] | |
# episodes start | |
for episode in range(args.episodes): | |
print("Episode:", episode) | |
episode_history_text = new_episode_marker() | |
success = False | |
episode_seed = args.seed + episode | |
env = make_env(args.env_name, episode_seed, env_args) | |
with open(prompt_log_filename, "a+") as f: | |
f.write("\n\n") | |
observations = [] | |
actions = [] | |
for i in range(int(args.max_steps)): | |
if i == 0: | |
obs, reward, done, info = reset(env) | |
prompt_action_text = "" | |
else: | |
with open(prompt_log_filename, "a+") as f: | |
f.write("\nnew prompt: -----------------------------------\n") | |
f.write(llm_prompt) | |
# querry the model | |
generation = generate(llm_prompt, args.model) | |
# parse the action | |
text_action = get_parsed_action(generation) | |
# get the raw action | |
action = text_action_to_action(text_action) | |
# execute the action | |
obs, reward, done, info = env.step(action) | |
prompt_action_text = f"{action_query()} {text_action}\n" | |
assert action_to_prompt_action_text(action) == prompt_action_text | |
actions.append(prompt_action_text) | |
text_obs = generate_text_obs(obs, info) | |
observations.append(text_obs) | |
step_text = prompt_preprocessor(prompt_action_text + text_obs) | |
print("Step text:") | |
print(step_text) | |
episode_history_text += step_text # append to history of this episode | |
if full_episode_history: | |
# feed full episode history | |
llm_prompt = in_context_examples + episode_history_text + action_query() | |
else: | |
n = min(last_n, len(observations)) | |
obs = observations[-n:] | |
act = (actions + [action_query()])[-n:] | |
episode_text = "".join([o+a for o, a in zip(obs, act)]) | |
llm_prompt = in_context_examples + new_episode_marker() + episode_text | |
llm_prompt = prompt_preprocessor(llm_prompt) | |
# save the image | |
env.render(mode="human") | |
rgb_img = plt_2_rgb(env) | |
frames.append(rgb_img) | |
if env.current_env.box.blocked and not env.current_env.box.is_open: | |
# target box is blocked -> apple can't be obtained | |
# break to save compute | |
break | |
if done: | |
# quadruple last frame to pause between episodes | |
for i in range(3): | |
same_img = np.copy(rgb_img) | |
# toggle a pixel between frames to avoid cropping when going from gif to mp4 | |
same_img[0, 0, 2] = 0 if (i % 2) == 0 else 255 | |
frames.append(same_img) | |
if reward > 0: | |
print("Success!") | |
episode_history_text += success_marker() | |
success = True | |
else: | |
episode_history_text += failure_marker() | |
with open(ep_h_log_filename, "a+") as f: | |
f.write("\nnew prompt: -----------------------------------\n") | |
f.write(episode_history_text) | |
break | |
else: | |
with open(ep_h_log_filename, "a+") as f: | |
f.write("\nnew prompt: -----------------------------------\n") | |
f.write(episode_history_text) | |
print(f"{'Success' if success else 'Failure'}") | |
success_rates.append(success) | |
mean_success_rate = np.mean(success_rates) | |
print("Success rate:", mean_success_rate) | |
print(f"Saving gif to {gif_savename}.") | |
mimsave(gif_savename, frames, duration=args.pause) | |
print("Done.") | |
log_data_dict = vars(args) | |
log_data_dict["success_rates"] = success_rates | |
log_data_dict["mean_success_rate"] = mean_success_rate | |
print("Evaluation log: ", evaluation_log_filename) | |
with open(evaluation_log_filename, "w") as f: | |
f.write(json.dumps(log_data_dict)) | |