\relax \providecommand\hyper@newdestlabel[2]{} \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined \global\let\oldcontentsline\contentsline \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} \global\let\oldnewlabel\newlabel \gdef\newlabel#1#2{\newlabelxx{#1}#2} \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} \AtEndDocument{\ifx\hyper@anchor\@undefined \let\contentsline\oldcontentsline \let\newlabel\oldnewlabel \fi} \fi} \global\let\hyper@last\relax \gdef\HyperFirstAtBeginDocument#1{#1} \providecommand\HyField@AuxAddToFields[1]{} \providecommand\HyField@AuxAddToCoFields[2]{} \citation{2108.11510} \citation{1708.05866} \citation{1906.10025} \citation{2303.08631} \citation{2106.14642} \citation{1511.02377} \citation{2012.01100} \citation{1709.05067} \citation{1708.05866} \citation{1906.10025} \citation{2212.00253} \citation{2106.14642} \citation{1811.09013} \citation{2209.01820} \citation{1911.04817} \citation{1512.07669} \citation{1511.02377} \citation{1512.09075} \citation{2008.10426} \citation{0711.2185} \@writefile{toc}{\contentsline {section}{\numberline {1}introduction}{1}{section.1}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {2}related works}{1}{section.2}\protected@file@percent } \@writefile{toc}{\contentsline {paragraph}{Markov Decision Processes:}{1}{section*.1}\protected@file@percent } \citation{2303.08631} \citation{2303.08631} \citation{2012.01100} \citation{2106.14642} \citation{2209.01820} \citation{1811.09013} \citation{2108.11510} \citation{1708.05866} \citation{1906.10025} \citation{2111.01334} \citation{1512.09075} \citation{1511.02377} \citation{1512.07669} \@writefile{toc}{\contentsline {paragraph}{Q-Learning and Variants:}{2}{section*.2}\protected@file@percent } \@writefile{toc}{\contentsline {paragraph}{Expert Q-Learning:}{2}{section*.3}\protected@file@percent } \@writefile{toc}{\contentsline {paragraph}{Policy Gradient Methods:}{2}{section*.4}\protected@file@percent } \@writefile{toc}{\contentsline {paragraph}{Deep Reinforcement Learning:}{2}{section*.5}\protected@file@percent } \@writefile{toc}{\contentsline {paragraph}{Temporal Networks:}{2}{section*.6}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {3}backgrounds}{2}{section.3}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Problem Statement and Foundational Concepts}{2}{subsection.3.1}\protected@file@percent } \citation{2303.08631} \citation{2303.08631} \citation{2106.14642} \citation{2303.08631} \citation{2106.14642} \citation{1703.02102} \citation{1811.09013} \citation{2209.01820} \bibdata{ref} \bibcite{0711.2185}{{1}{2007}{{Arie~Leizarowitz}}{{}}} \bibcite{2303.08631}{{2}{2023}{{Barber}}{{}}} \bibcite{1811.09013}{{3}{2018}{{Ehsan~Imani}}{{}}} \bibcite{1511.02377}{{4}{2015}{{Ehud~Lehrer}}{{}}} \bibcite{1708.05866}{{5}{2017}{{Kai~Arulkumaran}}{{}}} \bibcite{1512.07669}{{6}{2015}{{Krishnamurthy}}{{}}} \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Q-Learning and Related Algorithms}{3}{subsection.3.2}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Policy Gradient Methods}{3}{subsection.3.3}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Methodology and Evaluation Metrics}{3}{subsection.3.4}\protected@file@percent } \bibcite{1911.04817}{{7}{2019}{{Kämmerer}}{{}}} \bibcite{2106.14642}{{8}{2021}{{Li~Meng}}{{}}} \bibcite{1709.05067}{{9}{2017}{{Mahipal~Jadeja}}{{}}} \bibcite{2008.10426}{{10}{2020}{{Nathalie~Bertrand}}{{}}} \bibcite{2108.11510}{{11}{2021}{{Ngan~Le}}{{}}} \bibcite{1512.09075}{{12}{2015}{{Philip S.~Thomas}}{{}}} \bibcite{2212.00253}{{13}{2022}{{Qiyue~Yin}}{{}}} \bibcite{2012.01100}{{14}{2020}{{Rong~Zhu}}{{}}} \bibcite{1906.10025}{{15}{2019}{{Sergey~Ivanov}}{{}}} \bibcite{2209.01820}{{16}{2022}{{van Heeswijk}}{{}}} \bibcite{2111.01334}{{17}{2021}{{Xiu-Xiu~Zhan}}{{}}} \bibcite{1703.02102}{{18}{2017}{{Yemi~Okesanjo}}{{}}} \bibstyle{iclr2022_conference}