gomoku / DI-engine /ding /design /serial_main.puml
zjowowen's picture
init space
079c32c
@startuml serial_main
header Serial Pipeline
title Serial Main
participant controller
participant env_manager
participant policy
participant learner
participant replay_buffer
participant collector
participant evaluator
participant commander
autonumber
controller -> env_manager: init collector and evaluator env_manager; set seed
controller -> policy: init policy
controller -> learner: init learner; set learn_mode policy
controller -> collector: init collector; set collect_mode policy; set env_manager
controller -> evaluator: init evaluator; set eval_mode policy; set env_manager
controller -> commander: init commander; set command_mode policy
controller -> replay_buffer: init replay_buffer
alt random collect before training starts
collector -> collector: reset policy to random one; generate random data
collector -> replay_buffer: push_data
collector -> collector: reset policy back to the original one
end
learner -> learner: call before_run hook
loop
commander -> commander: step
alt this iteration needs evaluation
evaluator -> evaluator: eval_performance
alt reach eval stop_value
learner -> learner: save checkpoint and exit
else episode_return is new highest
learner -> learner: save checkpoint
end
end
collector -> collector: generate data (steps or episodes)
collector -> replay_buffer: push_data
loop learner_train_iteration times
replay_buffer -> learner: sample_data
learner -> learner: train
alt replay replay_buffer use prioritization
learner -> replay_buffer: update with priority_info
end
end
alt on_policy training
replay_buffer -> replay_buffer: clear
end
end
learner -> learner: call after_run hook
controller -> replay_buffer: close replay_buffer
controller -> learner: close learner
controller -> collector: close collector
controller -> evaluator: close evaluator
@enduml