Spaces:
Running
Running
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{2108.11510, | |
title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, | |
author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, | |
journal={arXiv preprint arXiv:2108.11510}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2108.11510v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{2108.11510, | |
title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, | |
author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, | |
journal={arXiv preprint arXiv:2108.11510}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2108.11510v1} | |
} | |
@article{2212.00253, | |
title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player | |
Multi-Agent Learning Toolbox}, | |
author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, | |
journal={arXiv preprint arXiv:2212.00253}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2212.00253v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{2108.11510, | |
title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, | |
author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, | |
journal={arXiv preprint arXiv:2108.11510}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2108.11510v1} | |
} | |
@article{2212.00253, | |
title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player | |
Multi-Agent Learning Toolbox}, | |
author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, | |
journal={arXiv preprint arXiv:2212.00253}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2212.00253v1} | |
} | |
@article{1709.05067, | |
title = {Deep Reinforcement Learning for Conversational AI}, | |
author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, | |
journal={arXiv preprint arXiv:1709.05067}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1709.05067v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{2108.11510, | |
title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, | |
author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, | |
journal={arXiv preprint arXiv:2108.11510}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2108.11510v1} | |
} | |
@article{2212.00253, | |
title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player | |
Multi-Agent Learning Toolbox}, | |
author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, | |
journal={arXiv preprint arXiv:2212.00253}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2212.00253v1} | |
} | |
@article{1709.05067, | |
title = {Deep Reinforcement Learning for Conversational AI}, | |
author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, | |
journal={arXiv preprint arXiv:1709.05067}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1709.05067v1} | |
} | |
@article{1708.05866, | |
title = {A Brief Survey of Deep Reinforcement Learning}, | |
author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, | |
journal={arXiv preprint arXiv:1708.05866}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1708.05866v2} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{2108.11510, | |
title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, | |
author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, | |
journal={arXiv preprint arXiv:2108.11510}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2108.11510v1} | |
} | |
@article{2212.00253, | |
title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player | |
Multi-Agent Learning Toolbox}, | |
author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, | |
journal={arXiv preprint arXiv:2212.00253}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2212.00253v1} | |
} | |
@article{1709.05067, | |
title = {Deep Reinforcement Learning for Conversational AI}, | |
author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, | |
journal={arXiv preprint arXiv:1709.05067}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1709.05067v1} | |
} | |
@article{1708.05866, | |
title = {A Brief Survey of Deep Reinforcement Learning}, | |
author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, | |
journal={arXiv preprint arXiv:1708.05866}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1708.05866v2} | |
} | |
@article{1906.10025, | |
title = {Modern Deep Reinforcement Learning Algorithms}, | |
author = {Sergey Ivanov , Alexander D'yakonov}, | |
journal={arXiv preprint arXiv:1906.10025}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1906.10025v2} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{2108.11510, | |
title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, | |
author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, | |
journal={arXiv preprint arXiv:2108.11510}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2108.11510v1} | |
} | |
@article{2212.00253, | |
title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player | |
Multi-Agent Learning Toolbox}, | |
author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, | |
journal={arXiv preprint arXiv:2212.00253}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2212.00253v1} | |
} | |
@article{1709.05067, | |
title = {Deep Reinforcement Learning for Conversational AI}, | |
author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, | |
journal={arXiv preprint arXiv:1709.05067}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1709.05067v1} | |
} | |
@article{1708.05866, | |
title = {A Brief Survey of Deep Reinforcement Learning}, | |
author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, | |
journal={arXiv preprint arXiv:1708.05866}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1708.05866v2} | |
} | |
@article{1906.10025, | |
title = {Modern Deep Reinforcement Learning Algorithms}, | |
author = {Sergey Ivanov , Alexander D'yakonov}, | |
journal={arXiv preprint arXiv:1906.10025}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1906.10025v2} | |
} | |
@article{2111.01334, | |
title = {Measuring and utilizing temporal network dissimilarity}, | |
author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang}, | |
journal={arXiv preprint arXiv:2111.01334}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2111.01334v1} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{2108.11510, | |
title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, | |
author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, | |
journal={arXiv preprint arXiv:2108.11510}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2108.11510v1} | |
} | |
@article{2212.00253, | |
title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player | |
Multi-Agent Learning Toolbox}, | |
author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, | |
journal={arXiv preprint arXiv:2212.00253}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2212.00253v1} | |
} | |
@article{1709.05067, | |
title = {Deep Reinforcement Learning for Conversational AI}, | |
author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, | |
journal={arXiv preprint arXiv:1709.05067}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1709.05067v1} | |
} | |
@article{1708.05866, | |
title = {A Brief Survey of Deep Reinforcement Learning}, | |
author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, | |
journal={arXiv preprint arXiv:1708.05866}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1708.05866v2} | |
} | |
@article{1906.10025, | |
title = {Modern Deep Reinforcement Learning Algorithms}, | |
author = {Sergey Ivanov , Alexander D'yakonov}, | |
journal={arXiv preprint arXiv:1906.10025}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1906.10025v2} | |
} | |
@article{2111.01334, | |
title = {Measuring and utilizing temporal network dissimilarity}, | |
author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang}, | |
journal={arXiv preprint arXiv:2111.01334}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2111.01334v1} | |
} | |
@article{2110.06553, | |
title = {Spatial-temporal Transformers for EEG Emotion Recognition}, | |
author = {Jiyao Liu , Hao Wu , Li Zhang , Yanxi Zhao}, | |
journal={arXiv preprint arXiv:2110.06553}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2110.06553v2} | |
} | |
@article{1512.07669, | |
title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov | |
Decision Processes}, | |
author = {Vikram Krishnamurthy}, | |
journal={arXiv preprint arXiv:1512.07669}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.07669v1} | |
} | |
@article{1511.02377, | |
title = {The Value Functions of Markov Decision Processes}, | |
author = {Ehud Lehrer , Eilon Solan , Omri N. Solan}, | |
journal={arXiv preprint arXiv:1511.02377}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1511.02377v1} | |
} | |
@article{1512.09075, | |
title = {A Notation for Markov Decision Processes}, | |
author = {Philip S. Thomas , Billy Okal}, | |
journal={arXiv preprint arXiv:1512.09075}, | |
year = {2015}, | |
url = {http://arxiv.org/abs/1512.09075v2} | |
} | |
@article{2008.10426, | |
title = {Taming denumerable Markov decision processes with decisiveness}, | |
author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier}, | |
journal={arXiv preprint arXiv:2008.10426}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2008.10426v1} | |
} | |
@article{0711.2185, | |
title = {Exact finite approximations of average-cost countable Markov Decision | |
Processes}, | |
author = {Arie Leizarowitz , Adam Shwartz}, | |
journal={arXiv preprint arXiv:0711.2185}, | |
year = {2007}, | |
url = {http://arxiv.org/abs/0711.2185v1} | |
} | |
@article{2303.08631, | |
title = {Smoothed Q-learning}, | |
author = {David Barber}, | |
journal={arXiv preprint arXiv:2303.08631}, | |
year = {2023}, | |
url = {http://arxiv.org/abs/2303.08631v1} | |
} | |
@article{2106.14642, | |
title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values | |
from Offline Expert Examples}, | |
author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad}, | |
journal={arXiv preprint arXiv:2106.14642}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.14642v3} | |
} | |
@article{2106.01134, | |
title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity}, | |
author = {Wei Liao , Xiaohui Wei , Jizhou Lai}, | |
journal={arXiv preprint arXiv:2106.01134}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2106.01134v1} | |
} | |
@article{2012.01100, | |
title = {Self-correcting Q-Learning}, | |
author = {Rong Zhu , Mattia Rigotti}, | |
journal={arXiv preprint arXiv:2012.01100}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2012.01100v2} | |
} | |
@article{1703.02102, | |
title = {Revisiting stochastic off-policy action-value gradients}, | |
author = {Yemi Okesanjo , Victor Kofia}, | |
journal={arXiv preprint arXiv:1703.02102}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1703.02102v2} | |
} | |
@article{2209.01820, | |
title = {Natural Policy Gradients In Reinforcement Learning Explained}, | |
author = {W. J. A. van Heeswijk}, | |
journal={arXiv preprint arXiv:2209.01820}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2209.01820v1} | |
} | |
@article{1811.09013, | |
title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings}, | |
author = {Ehsan Imani , Eric Graves , Martha White}, | |
journal={arXiv preprint arXiv:1811.09013}, | |
year = {2018}, | |
url = {http://arxiv.org/abs/1811.09013v2} | |
} | |
@article{1911.04817, | |
title = {On Policy Gradients}, | |
author = {Mattis Manfred Kämmerer}, | |
journal={arXiv preprint arXiv:1911.04817}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1911.04817v1} | |
} | |
@article{2108.11510, | |
title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey}, | |
author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides}, | |
journal={arXiv preprint arXiv:2108.11510}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2108.11510v1} | |
} | |
@article{2212.00253, | |
title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player | |
Multi-Agent Learning Toolbox}, | |
author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang}, | |
journal={arXiv preprint arXiv:2212.00253}, | |
year = {2022}, | |
url = {http://arxiv.org/abs/2212.00253v1} | |
} | |
@article{1709.05067, | |
title = {Deep Reinforcement Learning for Conversational AI}, | |
author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah}, | |
journal={arXiv preprint arXiv:1709.05067}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1709.05067v1} | |
} | |
@article{1708.05866, | |
title = {A Brief Survey of Deep Reinforcement Learning}, | |
author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath}, | |
journal={arXiv preprint arXiv:1708.05866}, | |
year = {2017}, | |
url = {http://arxiv.org/abs/1708.05866v2} | |
} | |
@article{1906.10025, | |
title = {Modern Deep Reinforcement Learning Algorithms}, | |
author = {Sergey Ivanov , Alexander D'yakonov}, | |
journal={arXiv preprint arXiv:1906.10025}, | |
year = {2019}, | |
url = {http://arxiv.org/abs/1906.10025v2} | |
} | |
@article{2111.01334, | |
title = {Measuring and utilizing temporal network dissimilarity}, | |
author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang}, | |
journal={arXiv preprint arXiv:2111.01334}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2111.01334v1} | |
} | |
@article{2110.06553, | |
title = {Spatial-temporal Transformers for EEG Emotion Recognition}, | |
author = {Jiyao Liu , Hao Wu , Li Zhang , Yanxi Zhao}, | |
journal={arXiv preprint arXiv:2110.06553}, | |
year = {2021}, | |
url = {http://arxiv.org/abs/2110.06553v2} | |
} | |
@article{2007.04828, | |
title = {Predictability of real temporal networks}, | |
author = {Disheng Tang , Wenbo Du , Louis Shekhtman , Yijie Wang , Shlomo Havlin , Xianbin Cao , Gang Yan}, | |
journal={arXiv preprint arXiv:2007.04828}, | |
year = {2020}, | |
url = {http://arxiv.org/abs/2007.04828v1} | |
} | |