project_charles / environment_state_actor.py
sohojoe's picture
refactor: use more of a MDP style structure
149eeaf
raw
history blame
1.81 kB
import ray
from datetime import datetime
from agent_state_actor import AgentState
class EnvironmentState:
def __init__(self, episode, step):
self.agent_state = None
self.timestamp = datetime.utcnow()
self.episode = episode
self.step = step
self.reward = 0
self.llm_preview = ''
self.llm_responses = []
self.tts_raw_chunk_ids = []
def __str__(self):
state = ', '.join(f'{k}={v}' for k, v in self.__dict__.items() if k not in {'episode', 'step', 'timestamp', 'reward'})
return f'episode={self.episode}, step={self.step}, timestamp={self.timestamp}, \nreward={self.reward}\nstate=({state})'
@ray.remote
class EnvironmentStateActor:
def __init__(self):
self.episode = 0
self.step = 0
self.state = None
self.reset_episode()
def reset_episode(self):
self.episode += 1
self.step = 0
self.state = EnvironmentState(self.episode, self.step)
return self.state
def begin_next_step(self)->EnvironmentState:
previous_state = self.state
self.step += 1
self.state = EnvironmentState(self.episode, self.step)
return previous_state
def add_reward(self, reward):
self.state.reward += reward
def set_llm_preview(self, llm_preview):
self.state.llm_preview = llm_preview
def add_llm_response_and_clear_llm_preview(self, llm_response):
self.state.llm_responses.append(llm_response)
self.state.llm_preview = ''
def add_tts_raw_chunk_id(self, chunk_id):
self.state.tts_raw_chunk_ids.append(chunk_id)
def add_agent_state(self, agent_state:AgentState):
self.state.agent_state = agent_state
def get_state(self)->EnvironmentState:
return self.state