general_cfg: algo_name: MO-QLearning device: cpu env_name: deep-sea-treasure-v0 eval_eps: 10 eval_per_episode: 5 load_checkpoint: false load_path: tasks max_steps: 200 mode: test new_step_api: true render: false save_fig: true seed: 0 show_fig: false test_eps: 20 train_eps: 100 wrapper: null algo_cfg: epsilon_decay: 300 epsilon_end: 0.01 epsilon_start: 0.95 gamma: 0.9 lr: 0.1 weights: - 0.5 - 0.5