File size: 2,331 Bytes
238735e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
\begin{thebibliography}{10}

\bibitem{2303.08631}
D.~Barber.
\newblock Smoothed q-learning.
\newblock {\em arXiv preprint arXiv:2303.08631}, 2023.

\bibitem{1811.09013}
M.~W. Ehsan~Imani, Eric~Graves.
\newblock An off-policy policy gradient theorem using emphatic weightings.
\newblock {\em arXiv preprint arXiv:1811.09013}, 2018.

\bibitem{2202.05135}
X.-J.~Z. Kaiyue~Wu.
\newblock Group-agent reinforcement learning.
\newblock {\em arXiv preprint arXiv:2202.05135}, 2022.

\bibitem{2001.09608}
C.~Li.
\newblock Some insights into lifelong reinforcement learning systems.
\newblock {\em arXiv preprint arXiv:2001.09608}, 2020.

\bibitem{2106.14642}
M.~G. P.~E. Li~Meng, Anis~Yazidi.
\newblock Expert q-learning: Deep reinforcement learning with coarse state
  values from offline expert examples.
\newblock {\em arXiv preprint arXiv:2106.14642}, 2021.

\bibitem{2108.11510}
K.~Y. K. L. M.~S. Ngan~Le, Vidhiwar Singh~Rathour.
\newblock Deep reinforcement learning in computer vision: A comprehensive
  survey.
\newblock {\em arXiv preprint arXiv:2108.11510}, 2021.

\bibitem{2212.00253}
S.~S. J. Y. M. Z. K. H. B. L. L.~W. Qiyue~Yin, Tongtong~Yu.
\newblock Distributed deep reinforcement learning: A survey and a multi-player
  multi-agent learning toolbox.
\newblock {\em arXiv preprint arXiv:2212.00253}, 2022.

\bibitem{2012.01100}
M.~R. Rong~Zhu.
\newblock Self-correcting q-learning.
\newblock {\em arXiv preprint arXiv:2012.01100}, 2020.

\bibitem{1911.09048}
J.~Schmidt.
\newblock Morphisms of networks of hybrid open systems.
\newblock {\em arXiv preprint arXiv:1911.09048}, 2019.

\bibitem{2209.01820}
W.~J.~A. van Heeswijk.
\newblock Natural policy gradients in reinforcement learning explained.
\newblock {\em arXiv preprint arXiv:2209.01820}, 2022.

\bibitem{2106.01134}
J.~L. Wei~Liao, Xiaohui~Wei.
\newblock Smooth q-learning: Accelerate convergence of q-learning using
  similarity.
\newblock {\em arXiv preprint arXiv:2106.01134}, 2021.

\bibitem{1703.02102}
V.~K. Yemi~Okesanjo.
\newblock Revisiting stochastic off-policy action-value gradients.
\newblock {\em arXiv preprint arXiv:1703.02102}, 2017.

\bibitem{2009.07888}
A.~K. J. J.~Z. Zhuangdi~Zhu, Kaixiang~Lin.
\newblock Transfer learning in deep reinforcement learning: A survey.
\newblock {\em arXiv preprint arXiv:2009.07888}, 2020.

\end{thebibliography}