\relax 
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldcontentsline\contentsline
\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\contentsline\oldcontentsline
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax 
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\citation{2108.11510}
\citation{1708.05866}
\citation{1906.10025}
\citation{2303.08631}
\citation{2106.14642}
\citation{1511.02377}
\citation{2012.01100}
\citation{1709.05067}
\citation{1708.05866}
\citation{1906.10025}
\citation{2212.00253}
\citation{2106.14642}
\citation{1811.09013}
\citation{2209.01820}
\citation{1911.04817}
\citation{1512.07669}
\citation{1511.02377}
\citation{1512.09075}
\citation{2008.10426}
\citation{0711.2185}
\@writefile{toc}{\contentsline {section}{\numberline {1}introduction}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}related works}{1}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{Markov Decision Processes:}{1}{section*.1}\protected@file@percent }
\citation{2303.08631}
\citation{2303.08631}
\citation{2012.01100}
\citation{2106.14642}
\citation{2209.01820}
\citation{1811.09013}
\citation{2108.11510}
\citation{1708.05866}
\citation{1906.10025}
\citation{2111.01334}
\citation{1512.09075}
\citation{1511.02377}
\citation{1512.07669}
\@writefile{toc}{\contentsline {paragraph}{Q-Learning and Variants:}{2}{section*.2}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{Expert Q-Learning:}{2}{section*.3}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{Policy Gradient Methods:}{2}{section*.4}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{Deep Reinforcement Learning:}{2}{section*.5}\protected@file@percent }
\@writefile{toc}{\contentsline {paragraph}{Temporal Networks:}{2}{section*.6}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}backgrounds}{2}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Problem Statement and Foundational Concepts}{2}{subsection.3.1}\protected@file@percent }
\citation{2303.08631}
\citation{2303.08631}
\citation{2106.14642}
\citation{2303.08631}
\citation{2106.14642}
\citation{1703.02102}
\citation{1811.09013}
\citation{2209.01820}
\bibdata{ref}
\bibcite{0711.2185}{{1}{2007}{{Arie~Leizarowitz}}{{}}}
\bibcite{2303.08631}{{2}{2023}{{Barber}}{{}}}
\bibcite{1811.09013}{{3}{2018}{{Ehsan~Imani}}{{}}}
\bibcite{1511.02377}{{4}{2015}{{Ehud~Lehrer}}{{}}}
\bibcite{1708.05866}{{5}{2017}{{Kai~Arulkumaran}}{{}}}
\bibcite{1512.07669}{{6}{2015}{{Krishnamurthy}}{{}}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Q-Learning and Related Algorithms}{3}{subsection.3.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Policy Gradient Methods}{3}{subsection.3.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Methodology and Evaluation Metrics}{3}{subsection.3.4}\protected@file@percent }
\bibcite{1911.04817}{{7}{2019}{{Kämmerer}}{{}}}
\bibcite{2106.14642}{{8}{2021}{{Li~Meng}}{{}}}
\bibcite{1709.05067}{{9}{2017}{{Mahipal~Jadeja}}{{}}}
\bibcite{2008.10426}{{10}{2020}{{Nathalie~Bertrand}}{{}}}
\bibcite{2108.11510}{{11}{2021}{{Ngan~Le}}{{}}}
\bibcite{1512.09075}{{12}{2015}{{Philip S.~Thomas}}{{}}}
\bibcite{2212.00253}{{13}{2022}{{Qiyue~Yin}}{{}}}
\bibcite{2012.01100}{{14}{2020}{{Rong~Zhu}}{{}}}
\bibcite{1906.10025}{{15}{2019}{{Sergey~Ivanov}}{{}}}
\bibcite{2209.01820}{{16}{2022}{{van Heeswijk}}{{}}}
\bibcite{2111.01334}{{17}{2021}{{Xiu-Xiu~Zhan}}{{}}}
\bibcite{1703.02102}{{18}{2017}{{Yemi~Okesanjo}}{{}}}
\bibstyle{iclr2022_conference}