@article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{1708.05866, title = {A Brief Survey of Deep Reinforcement Learning}, author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, journal={arXiv preprint arXiv:1708.05866}, year = {2017}, url = {http://arxiv.org/abs/1708.05866v2} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{1708.05866, title = {A Brief Survey of Deep Reinforcement Learning}, author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, journal={arXiv preprint arXiv:1708.05866}, year = {2017}, url = {http://arxiv.org/abs/1708.05866v2} } @article{1906.10025, title = {Modern Deep Reinforcement Learning Algorithms}, author = {Sergey Ivanov , Alexander D'yakonov}, journal={arXiv preprint arXiv:1906.10025}, year = {2019}, url = {http://arxiv.org/abs/1906.10025v2} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{1708.05866, title = {A Brief Survey of Deep Reinforcement Learning}, author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, journal={arXiv preprint arXiv:1708.05866}, year = {2017}, url = {http://arxiv.org/abs/1708.05866v2} } @article{1906.10025, title = {Modern Deep Reinforcement Learning Algorithms}, author = {Sergey Ivanov , Alexander D'yakonov}, journal={arXiv preprint arXiv:1906.10025}, year = {2019}, url = {http://arxiv.org/abs/1906.10025v2} } @article{2111.01334, title = {Measuring and utilizing temporal network dissimilarity}, author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang}, journal={arXiv preprint arXiv:2111.01334}, year = {2021}, url = {http://arxiv.org/abs/2111.01334v1} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{1708.05866, title = {A Brief Survey of Deep Reinforcement Learning}, author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, journal={arXiv preprint arXiv:1708.05866}, year = {2017}, url = {http://arxiv.org/abs/1708.05866v2} } @article{1906.10025, title = {Modern Deep Reinforcement Learning Algorithms}, author = {Sergey Ivanov , Alexander D'yakonov}, journal={arXiv preprint arXiv:1906.10025}, year = {2019}, url = {http://arxiv.org/abs/1906.10025v2} } @article{2111.01334, title = {Measuring and utilizing temporal network dissimilarity}, author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang}, journal={arXiv preprint arXiv:2111.01334}, year = {2021}, url = {http://arxiv.org/abs/2111.01334v1} } @article{2110.06553, title = {Spatial-temporal Transformers for EEG Emotion Recognition}, author = {Jiyao Liu , Hao Wu , Li Zhang , Yanxi Zhao}, journal={arXiv preprint arXiv:2110.06553}, year = {2021}, url = {http://arxiv.org/abs/2110.06553v2} } @article{1512.07669, title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov Decision Processes}, author = {Vikram Krishnamurthy}, journal={arXiv preprint arXiv:1512.07669}, year = {2015}, url = {http://arxiv.org/abs/1512.07669v1} } @article{1511.02377, title = {The Value Functions of Markov Decision Processes}, author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, journal={arXiv preprint arXiv:1511.02377}, year = {2015}, url = {http://arxiv.org/abs/1511.02377v1} } @article{1512.09075, title = {A Notation for Markov Decision Processes}, author = {Philip S. Thomas , Billy Okal}, journal={arXiv preprint arXiv:1512.09075}, year = {2015}, url = {http://arxiv.org/abs/1512.09075v2} } @article{2008.10426, title = {Taming denumerable Markov decision processes with decisiveness}, author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, journal={arXiv preprint arXiv:2008.10426}, year = {2020}, url = {http://arxiv.org/abs/2008.10426v1} } @article{0711.2185, title = {Exact finite approximations of average-cost countable Markov Decision Processes}, author = {Arie Leizarowitz , Adam Shwartz}, journal={arXiv preprint arXiv:0711.2185}, year = {2007}, url = {http://arxiv.org/abs/0711.2185v1} } @article{2303.08631, title = {Smoothed Q-learning}, author = {David Barber}, journal={arXiv preprint arXiv:2303.08631}, year = {2023}, url = {http://arxiv.org/abs/2303.08631v1} } @article{2106.14642, title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values from Offline Expert Examples}, author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, journal={arXiv preprint arXiv:2106.14642}, year = {2021}, url = {http://arxiv.org/abs/2106.14642v3} } @article{2106.01134, title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, journal={arXiv preprint arXiv:2106.01134}, year = {2021}, url = {http://arxiv.org/abs/2106.01134v1} } @article{2012.01100, title = {Self-correcting Q-Learning}, author = {Rong Zhu , Mattia Rigotti}, journal={arXiv preprint arXiv:2012.01100}, year = {2020}, url = {http://arxiv.org/abs/2012.01100v2} } @article{1703.02102, title = {Revisiting stochastic off-policy action-value gradients}, author = {Yemi Okesanjo , Victor Kofia}, journal={arXiv preprint arXiv:1703.02102}, year = {2017}, url = {http://arxiv.org/abs/1703.02102v2} } @article{2209.01820, title = {Natural Policy Gradients In Reinforcement Learning Explained}, author = {W. J. A. van Heeswijk}, journal={arXiv preprint arXiv:2209.01820}, year = {2022}, url = {http://arxiv.org/abs/2209.01820v1} } @article{1811.09013, title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, author = {Ehsan Imani , Eric Graves , Martha White}, journal={arXiv preprint arXiv:1811.09013}, year = {2018}, url = {http://arxiv.org/abs/1811.09013v2} } @article{1911.04817, title = {On Policy Gradients}, author = {Mattis Manfred Kämmerer}, journal={arXiv preprint arXiv:1911.04817}, year = {2019}, url = {http://arxiv.org/abs/1911.04817v1} } @article{2108.11510, title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, journal={arXiv preprint arXiv:2108.11510}, year = {2021}, url = {http://arxiv.org/abs/2108.11510v1} } @article{2212.00253, title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player Multi-Agent Learning Toolbox}, author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, journal={arXiv preprint arXiv:2212.00253}, year = {2022}, url = {http://arxiv.org/abs/2212.00253v1} } @article{1709.05067, title = {Deep Reinforcement Learning for Conversational AI}, author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, journal={arXiv preprint arXiv:1709.05067}, year = {2017}, url = {http://arxiv.org/abs/1709.05067v1} } @article{1708.05866, title = {A Brief Survey of Deep Reinforcement Learning}, author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, journal={arXiv preprint arXiv:1708.05866}, year = {2017}, url = {http://arxiv.org/abs/1708.05866v2} } @article{1906.10025, title = {Modern Deep Reinforcement Learning Algorithms}, author = {Sergey Ivanov , Alexander D'yakonov}, journal={arXiv preprint arXiv:1906.10025}, year = {2019}, url = {http://arxiv.org/abs/1906.10025v2} } @article{2111.01334, title = {Measuring and utilizing temporal network dissimilarity}, author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang}, journal={arXiv preprint arXiv:2111.01334}, year = {2021}, url = {http://arxiv.org/abs/2111.01334v1} } @article{2110.06553, title = {Spatial-temporal Transformers for EEG Emotion Recognition}, author = {Jiyao Liu , Hao Wu , Li Zhang , Yanxi Zhao}, journal={arXiv preprint arXiv:2110.06553}, year = {2021}, url = {http://arxiv.org/abs/2110.06553v2} } @article{2007.04828, title = {Predictability of real temporal networks}, author = {Disheng Tang , Wenbo Du , Louis Shekhtman , Yijie Wang , Shlomo Havlin , Xianbin Cao , Gang Yan}, journal={arXiv preprint arXiv:2007.04828}, year = {2020}, url = {http://arxiv.org/abs/2007.04828v1} }