|
|
|
|
|
from twisted.internet import reactor, defer, task
|
|
from agent import AutonomousWebAgent
|
|
import random
|
|
import logging
|
|
import sys
|
|
import time
|
|
import codecs
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler("agent_training.log", encoding='utf-8'),
|
|
logging.StreamHandler(codecs.getwriter('utf-8')(sys.stdout.buffer))
|
|
])
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
QUERIES = [
|
|
"machine learning", "climate change", "renewable energy", "artificial intelligence",
|
|
"quantum computing", "blockchain technology", "gene editing", "virtual reality",
|
|
"space exploration", "cybersecurity", "autonomous vehicles", "Internet of Things",
|
|
"3D printing", "nanotechnology", "bioinformatics", "augmented reality", "robotics",
|
|
"data science", "neural networks", "cloud computing", "edge computing", "5G technology",
|
|
"cryptocurrency", "natural language processing", "computer vision"
|
|
]
|
|
|
|
@defer.inlineCallbacks
|
|
def train_agent():
|
|
|
|
state_size = 7
|
|
action_size = 3
|
|
num_options = 3
|
|
|
|
|
|
agent = AutonomousWebAgent(
|
|
state_size=state_size,
|
|
action_size=action_size,
|
|
num_options=num_options,
|
|
hidden_size=64,
|
|
learning_rate=0.001,
|
|
gamma=0.99,
|
|
epsilon=1.0,
|
|
epsilon_decay=0.995,
|
|
epsilon_min=0.01,
|
|
knowledge_base_path='knowledge_base.json'
|
|
)
|
|
logger.info(f"Initialized AutonomousWebAgent with state_size={state_size}, action_size={action_size}, num_options={num_options}")
|
|
|
|
num_episodes = 10
|
|
total_training_reward = 0
|
|
start_time = time.time()
|
|
|
|
for episode in range(num_episodes):
|
|
query = random.choice(QUERIES)
|
|
logger.info(f"Starting episode {episode + 1}/{num_episodes} with query: {query}")
|
|
episode_start_time = time.time()
|
|
|
|
try:
|
|
|
|
search_deferred = agent.search(query)
|
|
search_deferred.addTimeout(300, reactor)
|
|
total_reward = yield search_deferred
|
|
total_training_reward += total_reward
|
|
episode_duration = time.time() - episode_start_time
|
|
logger.info(f"Episode {episode + 1}/{num_episodes}, Query: {query}, Total Reward: {total_reward}, Duration: {episode_duration:.2f} seconds")
|
|
except defer.TimeoutError:
|
|
logger.error(f"Episode {episode + 1} timed out")
|
|
total_reward = -1
|
|
total_training_reward += total_reward
|
|
except Exception as e:
|
|
logger.error(f"Error in episode {episode + 1}: {str(e)}", exc_info=True)
|
|
total_reward = -1
|
|
total_training_reward += total_reward
|
|
|
|
|
|
if (episode + 1) % 10 == 0:
|
|
logger.info(f"Updating target models at episode {episode + 1}")
|
|
agent.update_worker_target_model()
|
|
agent.update_manager_target_model()
|
|
agent.manager.update_target_model()
|
|
|
|
|
|
progress = (episode + 1) / num_episodes
|
|
elapsed_time = time.time() - start_time
|
|
estimated_total_time = elapsed_time / progress if progress > 0 else 0
|
|
remaining_time = estimated_total_time - elapsed_time
|
|
logger.info(f"Overall progress: {progress:.2%}, Elapsed time: {elapsed_time:.2f}s, Estimated remaining time: {remaining_time:.2f}s")
|
|
|
|
total_training_time = time.time() - start_time
|
|
average_reward = total_training_reward / num_episodes
|
|
logger.info(f"Training completed. Total reward: {total_training_reward}, Average reward per episode: {average_reward:.2f}")
|
|
logger.info(f"Total training time: {total_training_time:.2f} seconds")
|
|
logger.info("Saving models.")
|
|
|
|
|
|
agent.save_worker_model("worker_model.pth")
|
|
agent.save_manager_model("manager_model.pth")
|
|
agent.save("web_agent_model.pth")
|
|
|
|
if reactor.running:
|
|
logger.info("Stopping reactor")
|
|
reactor.stop()
|
|
|
|
def main():
|
|
logger.info("Starting agent training")
|
|
d = task.deferLater(reactor, 0, train_agent)
|
|
d.addErrback(lambda failure: logger.error(f"An error occurred: {failure}", exc_info=True))
|
|
d.addBoth(lambda _: reactor.stop())
|
|
reactor.run()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|