Spaces:
Running
Running
\begin{thebibliography}{10} | |
\bibitem{2303.08631} | |
D.~Barber. | |
\newblock Smoothed q-learning. | |
\newblock {\em arXiv preprint arXiv:2303.08631}, 2023. | |
\bibitem{1811.09013} | |
M.~W. Ehsan~Imani, Eric~Graves. | |
\newblock An off-policy policy gradient theorem using emphatic weightings. | |
\newblock {\em arXiv preprint arXiv:1811.09013}, 2018. | |
\bibitem{2202.05135} | |
X.-J.~Z. Kaiyue~Wu. | |
\newblock Group-agent reinforcement learning. | |
\newblock {\em arXiv preprint arXiv:2202.05135}, 2022. | |
\bibitem{2001.09608} | |
C.~Li. | |
\newblock Some insights into lifelong reinforcement learning systems. | |
\newblock {\em arXiv preprint arXiv:2001.09608}, 2020. | |
\bibitem{2106.14642} | |
M.~G. P.~E. Li~Meng, Anis~Yazidi. | |
\newblock Expert q-learning: Deep reinforcement learning with coarse state | |
values from offline expert examples. | |
\newblock {\em arXiv preprint arXiv:2106.14642}, 2021. | |
\bibitem{2108.11510} | |
K.~Y. K. L. M.~S. Ngan~Le, Vidhiwar Singh~Rathour. | |
\newblock Deep reinforcement learning in computer vision: A comprehensive | |
survey. | |
\newblock {\em arXiv preprint arXiv:2108.11510}, 2021. | |
\bibitem{2212.00253} | |
S.~S. J. Y. M. Z. K. H. B. L. L.~W. Qiyue~Yin, Tongtong~Yu. | |
\newblock Distributed deep reinforcement learning: A survey and a multi-player | |
multi-agent learning toolbox. | |
\newblock {\em arXiv preprint arXiv:2212.00253}, 2022. | |
\bibitem{2012.01100} | |
M.~R. Rong~Zhu. | |
\newblock Self-correcting q-learning. | |
\newblock {\em arXiv preprint arXiv:2012.01100}, 2020. | |
\bibitem{1911.09048} | |
J.~Schmidt. | |
\newblock Morphisms of networks of hybrid open systems. | |
\newblock {\em arXiv preprint arXiv:1911.09048}, 2019. | |
\bibitem{2209.01820} | |
W.~J.~A. van Heeswijk. | |
\newblock Natural policy gradients in reinforcement learning explained. | |
\newblock {\em arXiv preprint arXiv:2209.01820}, 2022. | |
\bibitem{2106.01134} | |
J.~L. Wei~Liao, Xiaohui~Wei. | |
\newblock Smooth q-learning: Accelerate convergence of q-learning using | |
similarity. | |
\newblock {\em arXiv preprint arXiv:2106.01134}, 2021. | |
\bibitem{1703.02102} | |
V.~K. Yemi~Okesanjo. | |
\newblock Revisiting stochastic off-policy action-value gradients. | |
\newblock {\em arXiv preprint arXiv:1703.02102}, 2017. | |
\bibitem{2009.07888} | |
A.~K. J. J.~Z. Zhuangdi~Zhu, Kaixiang~Lin. | |
\newblock Transfer learning in deep reinforcement learning: A survey. | |
\newblock {\em arXiv preprint arXiv:2009.07888}, 2020. | |
\end{thebibliography} | |