@article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{1009.2998, title = {Compact integral manifolds of differential systems}, author = {V. N. Gorbuzov}, journal={arXiv preprint arXiv:1009.2998}, year = {2010}, url = {http://arxiv.org/abs/1009.2998v1} } @article{2001.09608, title = {Some Insights into Lifelong Reinforcement Learning Systems}, author = {Changjian Li}, journal={arXiv preprint arXiv:2001.09608}, year = {2020}, url = {http://arxiv.org/abs/2001.09608v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2202.05135, title = {Group-Agent Reinforcement Learning}, author = {Kaiyue Wu , Xiao-Jun Zeng}, journal={arXiv preprint arXiv:2202.05135}, year = {2022}, url = {http://arxiv.org/abs/2202.05135v3} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{2009.07888, title = {Transfer Learning in Deep Reinforcement Learning: A Survey}, author = {Zhuangdi Zhu , Kaixiang Lin , Anil K. Jain , Jiayu Zhou}, journal={arXiv preprint arXiv:2009.07888}, year = {2020}, url = {http://arxiv.org/abs/2009.07888v5} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{1009.2998, title = {Compact integral manifolds of differential systems}, author = {V. N. Gorbuzov}, journal={arXiv preprint arXiv:1009.2998}, year = {2010}, url = {http://arxiv.org/abs/1009.2998v1} } @article{1911.09048, title = {Morphisms of Networks of Hybrid Open Systems}, author = {James Schmidt}, journal={arXiv preprint arXiv:1911.09048}, year = {2019}, url = {http://arxiv.org/abs/1911.09048v2} }