diff --git a/app.py b/app.py index 81c0846e9a1787357052386529c0614b69f0b195..081cd8b0310f5a99da70b5cfd1d889d5a4ec0641 100644 --- a/app.py +++ b/app.py @@ -2,7 +2,7 @@ import gradio as gr import os import openai from auto_backgrounds import generate_backgrounds, generate_draft -from utils.file_operations import hash_name +from utils.file_operations import hash_name, list_folders from references_generator import generate_top_k_references # todo: @@ -42,6 +42,8 @@ else: except Exception as e: IS_OPENAI_API_KEY_AVAILABLE = False +ALL_TEMPLATES = list_folders("latex_templates") + def clear_inputs(*args): return "", "" @@ -108,7 +110,7 @@ theme = gr.themes.Default(font=gr.themes.GoogleFont("Questrial")) ACADEMIC_PAPER = """## 一键生成论文初稿 1. 在Title文本框中输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning). -2. 点击Submit. 等待大概十分钟. +2. 点击Submit. 等待大概十五分钟(全文). 3. 在右侧下载.zip格式的输出,在Overleaf上编译浏览. """ @@ -146,6 +148,10 @@ with gr.Blocks(theme=theme) as demo: 本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_draft功能的测试. 通过输入想要生成的论文名称(比如Playing atari with deep reinforcement learning),即可由AI辅助生成论文模板. + ***2023-06-08 Update***: + * 目前对英文的生成效果更好. 如果需要中文文章可以使用[GPT学术优化](https://github.com/binary-husky/gpt_academic)的`Latex全文翻译、润色`功能. + * 支持 + ***2023-05-17 Update***: 我的API的余额用完了, 所以这个月不再能提供GPT-4的API Key. 这里为大家提供了一个位置输入OpenAI API Key. 同时也提供了GPT-3.5的兼容. 欢迎大家自行体验. 如果有更多想法和建议欢迎加入QQ群里交流, 如果我在Space里更新了Key我会第一时间通知大家. 群号: ***249738228***. @@ -170,9 +176,9 @@ with gr.Blocks(theme=theme) as demo: description_pp = gr.Textbox(lines=5, label="Description (Optional)", visible=True, info="对希望生成的论文的一些描述. 包括这篇论文的创新点, 主要贡献, 等.") with gr.Row(): - template = gr.Dropdown(label="Template", choices=["ICLR2022"], value="ICLR2022", - interactive=False, - info="生成论文的参考模板. (暂不支持修改)") + template = gr.Dropdown(label="Template", choices=ALL_TEMPLATES, value="Default", + interactive=True, + info="生成论文的参考模板.") model_selection = gr.Dropdown(label="Model", choices=["gpt-4", "gpt-3.5-turbo"], value="gpt-3.5-turbo", interactive=True, @@ -202,10 +208,6 @@ with gr.Blocks(theme=theme) as demo: ''') bibtex_file = gr.File(label="Upload .bib file", file_types=["text"], interactive=True) - gr.Examples( - examples=["latex_templates/example_references.bib"], - inputs=bibtex_file - ) with gr.Row(): with gr.Column(scale=1): diff --git a/latex_templates/Summary/abstract.tex b/latex_templates/Default/abstract.tex similarity index 100% rename from latex_templates/Summary/abstract.tex rename to latex_templates/Default/abstract.tex diff --git a/latex_templates/Summary/backgrounds.tex b/latex_templates/Default/backgrounds.tex similarity index 100% rename from latex_templates/Summary/backgrounds.tex rename to latex_templates/Default/backgrounds.tex diff --git a/latex_templates/Summary/conclusion.tex b/latex_templates/Default/conclusion.tex similarity index 100% rename from latex_templates/Summary/conclusion.tex rename to latex_templates/Default/conclusion.tex diff --git a/latex_templates/Summary/experiments.tex b/latex_templates/Default/experiments.tex similarity index 100% rename from latex_templates/Summary/experiments.tex rename to latex_templates/Default/experiments.tex diff --git a/latex_templates/Summary/fancyhdr.sty b/latex_templates/Default/fancyhdr.sty similarity index 100% rename from latex_templates/Summary/fancyhdr.sty rename to latex_templates/Default/fancyhdr.sty diff --git a/latex_templates/Default/fig.png b/latex_templates/Default/fig.png new file mode 100644 index 0000000000000000000000000000000000000000..61141907f9164cf762226c43c05a7d681212a767 Binary files /dev/null and b/latex_templates/Default/fig.png differ diff --git a/latex_templates/Summary/iclr2022_conference.bst b/latex_templates/Default/iclr2022_conference.bst similarity index 100% rename from latex_templates/Summary/iclr2022_conference.bst rename to latex_templates/Default/iclr2022_conference.bst diff --git a/latex_templates/Summary/iclr2022_conference.sty b/latex_templates/Default/iclr2022_conference.sty similarity index 100% rename from latex_templates/Summary/iclr2022_conference.sty rename to latex_templates/Default/iclr2022_conference.sty diff --git a/latex_templates/Summary/introduction.tex b/latex_templates/Default/introduction.tex similarity index 100% rename from latex_templates/Summary/introduction.tex rename to latex_templates/Default/introduction.tex diff --git a/latex_templates/Summary/math_commands.tex b/latex_templates/Default/math_commands.tex similarity index 100% rename from latex_templates/Summary/math_commands.tex rename to latex_templates/Default/math_commands.tex diff --git a/latex_templates/Summary/methodology.tex b/latex_templates/Default/methodology.tex similarity index 100% rename from latex_templates/Summary/methodology.tex rename to latex_templates/Default/methodology.tex diff --git a/latex_templates/Summary/natbib.sty b/latex_templates/Default/natbib.sty similarity index 100% rename from latex_templates/Summary/natbib.sty rename to latex_templates/Default/natbib.sty diff --git a/latex_templates/Summary/related works.tex b/latex_templates/Default/related works.tex similarity index 100% rename from latex_templates/Summary/related works.tex rename to latex_templates/Default/related works.tex diff --git a/outputs/outputs_20230421_000752/template.tex b/latex_templates/Default/template.tex similarity index 76% rename from outputs/outputs_20230421_000752/template.tex rename to latex_templates/Default/template.tex index a114fc331c078fd4ad0f2a1e80bf7f74241be4d5..95d371d30951b1723ad55b014c7361b31feefa79 100644 --- a/outputs/outputs_20230421_000752/template.tex +++ b/latex_templates/Default/template.tex @@ -2,11 +2,17 @@ \UseRawInputEncoding \usepackage{graphicx} \usepackage{booktabs} -\usepackage{iclr2022_conference, times} +\usepackage{times} +\usepackage{eso-pic} % used by \AddToShipoutPicture +\RequirePackage{fancyhdr} +\RequirePackage{natbib} +\usepackage{fullpage} + \input{math_commands.tex} \usepackage{hyperref} \usepackage{url} -\usepackage{algorithmicx} +\usepackage{algorithm} +\usepackage{algpseudocode} \title{TITLE} \author{GPT-4} diff --git a/latex_templates/Summary/template.tex b/latex_templates/Summary/template.tex deleted file mode 100644 index 1dbd71f34f0bc6660ea48591f65e45492b4e78bb..0000000000000000000000000000000000000000 --- a/latex_templates/Summary/template.tex +++ /dev/null @@ -1,33 +0,0 @@ -\documentclass{article} % For LaTeX2e -\UseRawInputEncoding -\usepackage{graphicx} -\usepackage{booktabs} -\input{math_commands.tex} -\usepackage{hyperref} -\usepackage{url} -\usepackage{algorithmicx} - -\title{TITLE} -\author{GPT-4} - -\newcommand{\fix}{\marginpar{FIX}} -\newcommand{\new}{\marginpar{NEW}} - -\begin{document} -\maketitle -\input{abstract.tex} -\input{introduction.tex} -\input{related works.tex} -\input{backgrounds.tex} -\input{methodology.tex} -\input{experiments.tex} -\input{conclusion.tex} - -\bibliography{ref} -\bibliographystyle{abbrv} - -%\appendix -%\section{Appendix} -%You may include other additional sections here. - -\end{document} diff --git a/latex_templates/example_references.bib b/latex_templates/example_references.bib deleted file mode 100644 index d5f5682348c09273888e3221361dd6da903304a2..0000000000000000000000000000000000000000 --- a/latex_templates/example_references.bib +++ /dev/null @@ -1,9 +0,0 @@ -@inproceedings{ma2020understanding, - title={Understanding the impact of model incoherence on convergence of incremental sgd with random reshuffle}, - author={Ma, Shaocong and Zhou, Yi}, - booktitle={International Conference on Machine Learning}, - pages={6565--6574}, - year={2020}, - organization={PMLR}, - abstract={Although SGD with random reshuffle has been widely-used in machine learning applications, there is a limited understanding of how model characteristics affect the convergence of the algorithm. In this work, we introduce model incoherence to characterize the diversity of model characteristics and study its impact on convergence of SGD with random reshuffle under weak strong convexity. Specifically, minimizer incoherence measures the discrepancy between the global minimizers of a sample loss and those of the total loss and affects the convergence error of SGD with random reshuffle. In particular, we show that the variable sequence generated by SGD with random reshuffle converges to a certain global minimizer of the total loss under full minimizer coherence. The other curvature incoherence measures the quality of condition numbers of the sample losses and determines the convergence rate of SGD. With model incoherence, our results show that SGD has a faster convergence rate and smaller convergence error under random reshuffle than those under random sampling, and hence provide justifications to the superior practical performance of SGD with random reshuffle.} -} \ No newline at end of file diff --git a/outputs/outputs_20230421_000752/abstract.tex b/outputs/outputs_20230421_000752/abstract.tex deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/outputs/outputs_20230421_000752/backgrounds.tex b/outputs/outputs_20230421_000752/backgrounds.tex deleted file mode 100644 index 70d2ac9a527e9adfac4591b4daf2746003bc3faf..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/backgrounds.tex +++ /dev/null @@ -1,20 +0,0 @@ -\section{backgrounds} -\subsection{Problem Statement and Foundational Concepts} - -Reinforcement Learning (RL) is a subfield of machine learning that focuses on training agents to make decisions in an environment to maximize a cumulative reward signal. In RL, an agent interacts with an environment through a sequence of actions, observations, and rewards, aiming to learn an optimal policy that maps states to actions \cite{1512.09075}. The problem can be formalized as a Markov Decision Process (MDP), which is defined by a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{1511.02377}. The goal of RL is to find a policy $\pi(a|s)$ that maximizes the expected cumulative reward, defined as $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $R_{t+k+1}$ is the reward received at time step $t+k+1$ \cite{1512.07669}. - -\subsection{Q-Learning and Related Algorithms} - -Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected cumulative reward of taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by: - -\[Q(s, a) \leftarrow Q(s, a) + \alpha \left[ R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a) \right],\] - -where $\alpha$ is the learning rate, $R(s, a)$ is the reward for taking action $a$ in state $s$, and $s'$ is the next state \cite{2303.08631}. However, Q-learning can suffer from overestimation bias, which can lead to suboptimal performance \cite{2106.14642}. To address this issue, Double Q-learning was proposed, which uses two separate Q-value estimators and updates them alternately, mitigating overestimation bias while maintaining convergence guarantees \cite{2303.08631}. Another variant, Expert Q-learning, incorporates semi-supervised learning by splitting Q-values into state values and action advantages, and using an expert network to assess the value of states \cite{2106.14642}. - -\subsection{Policy Gradient Methods} - -Policy gradient methods are another class of RL algorithms that optimize the policy directly by estimating the gradient of the expected cumulative reward with respect to the policy parameters \cite{1703.02102}. The policy gradient theorem provides a simplified form for the gradient, which can be used to derive on-policy and off-policy algorithms \cite{1811.09013}. Natural policy gradients, which incorporate second-order information to improve convergence, form the foundation for state-of-the-art algorithms like Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. - -\subsection{Methodology and Evaluation Metrics} - -In this paper, we will explore various RL algorithms, focusing on Q-learning and its variants, as well as policy gradient methods. We will delve into their theoretical foundations, convergence properties, and practical limitations. To assess the performance of these algorithms, we will use evaluation metrics such as cumulative reward, convergence speed, and sample efficiency. By comparing the performance of different algorithms, we aim to provide insights into their strengths and weaknesses, and identify potential areas for improvement and future research directions. \ No newline at end of file diff --git a/outputs/outputs_20230421_000752/conclusion.tex b/outputs/outputs_20230421_000752/conclusion.tex deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/outputs/outputs_20230421_000752/experiments.tex b/outputs/outputs_20230421_000752/experiments.tex deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/outputs/outputs_20230421_000752/fancyhdr.sty b/outputs/outputs_20230421_000752/fancyhdr.sty deleted file mode 100644 index 77ed4e3012d822c7cca5c17efcae308b32b8cc2b..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/fancyhdr.sty +++ /dev/null @@ -1,485 +0,0 @@ -% fancyhdr.sty version 3.2 -% Fancy headers and footers for LaTeX. -% Piet van Oostrum, -% Dept of Computer and Information Sciences, University of Utrecht, -% Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands -% Telephone: +31 30 2532180. Email: piet@cs.uu.nl -% ======================================================================== -% LICENCE: -% This file may be distributed under the terms of the LaTeX Project Public -% License, as described in lppl.txt in the base LaTeX distribution. -% Either version 1 or, at your option, any later version. -% ======================================================================== -% MODIFICATION HISTORY: -% Sep 16, 1994 -% version 1.4: Correction for use with \reversemargin -% Sep 29, 1994: -% version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands -% Oct 4, 1994: -% version 1.6: Reset single spacing in headers/footers for use with -% setspace.sty or doublespace.sty -% Oct 4, 1994: -% version 1.7: changed \let\@mkboth\markboth to -% \def\@mkboth{\protect\markboth} to make it more robust -% Dec 5, 1994: -% version 1.8: corrections for amsbook/amsart: define \@chapapp and (more -% importantly) use the \chapter/sectionmark definitions from ps@headings if -% they exist (which should be true for all standard classes). -% May 31, 1995: -% version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage... -% construction in the doc did not work properly with the fancyplain style. -% June 1, 1995: -% version 1.91: The definition of \@mkboth wasn't restored on subsequent -% \pagestyle{fancy}'s. -% June 1, 1995: -% version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain} -% \pagestyle{fancy} would erroneously select the plain version. -% June 1, 1995: -% version 1.93: \fancypagestyle command added. -% Dec 11, 1995: -% version 1.94: suggested by Conrad Hughes -% CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule -% position (old hardcoded value of .3\normalbaselineskip is far too high -% when used with very small footer fonts). -% Jan 31, 1996: -% version 1.95: call \@normalsize in the reset code if that is defined, -% otherwise \normalsize. -% this is to solve a problem with ucthesis.cls, as this doesn't -% define \@currsize. Unfortunately for latex209 calling \normalsize doesn't -% work as this is optimized to do very little, so there \@normalsize should -% be called. Hopefully this code works for all versions of LaTeX known to -% mankind. -% April 25, 1996: -% version 1.96: initialize \headwidth to a magic (negative) value to catch -% most common cases that people change it before calling \pagestyle{fancy}. -% Note it can't be initialized when reading in this file, because -% \textwidth could be changed afterwards. This is quite probable. -% We also switch to \MakeUppercase rather than \uppercase and introduce a -% \nouppercase command for use in headers. and footers. -% May 3, 1996: -% version 1.97: Two changes: -% 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults -% for the chapter and section marks. The current version of amsbook and -% amsart classes don't seem to need them anymore. Moreover the standard -% latex classes don't use \markboth if twoside isn't selected, and this is -% confusing as \leftmark doesn't work as expected. -% 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem -% in the amsbook and amsart classes, that make global changes to \topskip, -% which are reset in \ps@empty. Hopefully this doesn't break other things. -% May 7, 1996: -% version 1.98: -% Added % after the line \def\nouppercase -% May 7, 1996: -% version 1.99: This is the alpha version of fancyhdr 2.0 -% Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf. -% Changed \headrulewidth, \footrulewidth, \footruleskip to -% macros rather than length parameters, In this way they can be -% conditionalized and they don't consume length registers. There is no need -% to have them as length registers unless you want to do calculations with -% them, which is unlikely. Note that this may make some uses of them -% incompatible (i.e. if you have a file that uses \setlength or \xxxx=) -% May 10, 1996: -% version 1.99a: -% Added a few more % signs -% May 10, 1996: -% version 1.99b: -% Changed the syntax of \f@nfor to be resistent to catcode changes of := -% Removed the [1] from the defs of \lhead etc. because the parameter is -% consumed by the \@[xy]lhead etc. macros. -% June 24, 1997: -% version 1.99c: -% corrected \nouppercase to also include the protected form of \MakeUppercase -% \global added to manipulation of \headwidth. -% \iffootnote command added. -% Some comments added about \@fancyhead and \@fancyfoot. -% Aug 24, 1998 -% version 1.99d -% Changed the default \ps@empty to \ps@@empty in order to allow -% \fancypagestyle{empty} redefinition. -% Oct 11, 2000 -% version 2.0 -% Added LPPL license clause. -% -% A check for \headheight is added. An errormessage is given (once) if the -% header is too large. Empty headers don't generate the error even if -% \headheight is very small or even 0pt. -% Warning added for the use of 'E' option when twoside option is not used. -% In this case the 'E' fields will never be used. -% -% Mar 10, 2002 -% version 2.1beta -% New command: \fancyhfoffset[place]{length} -% defines offsets to be applied to the header/footer to let it stick into -% the margins (if length > 0). -% place is like in fancyhead, except that only E,O,L,R can be used. -% This replaces the old calculation based on \headwidth and the marginpar -% area. -% \headwidth will be dynamically calculated in the headers/footers when -% this is used. -% -% Mar 26, 2002 -% version 2.1beta2 -% \fancyhfoffset now also takes h,f as possible letters in the argument to -% allow the header and footer widths to be different. -% New commands \fancyheadoffset and \fancyfootoffset added comparable to -% \fancyhead and \fancyfoot. -% Errormessages and warnings have been made more informative. -% -% Dec 9, 2002 -% version 2.1 -% The defaults for \footrulewidth, \plainheadrulewidth and -% \plainfootrulewidth are changed from \z@skip to 0pt. In this way when -% someone inadvertantly uses \setlength to change any of these, the value -% of \z@skip will not be changed, rather an errormessage will be given. - -% March 3, 2004 -% Release of version 3.0 - -% Oct 7, 2004 -% version 3.1 -% Added '\endlinechar=13' to \fancy@reset to prevent problems with -% includegraphics in header when verbatiminput is active. - -% March 22, 2005 -% version 3.2 -% reset \everypar (the real one) in \fancy@reset because spanish.ldf does -% strange things with \everypar between << and >>. - -\def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty} - -\def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else - \fancy@gbl\def#1{#2\strut}\fi} - -\let\fancy@gbl\global - -\def\@fancyerrmsg#1{% - \ifx\PackageError\undefined - \errmessage{#1}\else - \PackageError{Fancyhdr}{#1}{}\fi} -\def\@fancywarning#1{% - \ifx\PackageWarning\undefined - \errmessage{#1}\else - \PackageWarning{Fancyhdr}{#1}{}\fi} - -% Usage: \@forc \var{charstring}{command to be executed for each char} -% This is similar to LaTeX's \@tfor, but expands the charstring. - -\def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}} -\def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else - \f@@rc#1#2\f@@rc{#3}\fi} -\def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}} - -% Usage: \f@nfor\name:=list\do{body} -% Like LaTeX's \@for but an empty list is treated as a list with an empty -% element - -\newcommand{\f@nfor}[3]{\edef\@fortmp{#2}% - \expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}} - -% Usage: \def@ult \cs{defaults}{argument} -% sets \cs to the characters from defaults appearing in argument -% or defaults if it would be empty. All characters are lowercased. - -\newcommand\def@ult[3]{% - \edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a - \def#1{}% - \@forc\tmpf@ra{#2}% - {\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}% - \ifx\@empty#1\def#1{#2}\fi} -% -% \if@in -% -\newcommand{\if@in}[4]{% - \edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}% - \expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi} - -\newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}% - {\f@ncyhf\fancyhead h[]}} -\newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}% - {\f@ncyhf\fancyfoot f[]}} -\newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}% - {\f@ncyhf\fancyhf{}[]}} - -% New commands for offsets added - -\newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}% - {\f@ncyhfoffs\fancyheadoffset h[]}} -\newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}% - {\f@ncyhfoffs\fancyfootoffset f[]}} -\newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}% - {\f@ncyhfoffs\fancyhfoffset{}[]}} - -% The header and footer fields are stored in command sequences with -% names of the form: \f@ncy with for [eo], from [lcr] -% and from [hf]. - -\def\f@ncyhf#1#2[#3]#4{% - \def\temp@c{}% - \@forc\tmpf@ra{#3}% - {\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}% - {}{\edef\temp@c{\temp@c\tmpf@ra}}}% - \ifx\@empty\temp@c\else - \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument: - [#3]}% - \fi - \f@nfor\temp@c{#3}% - {\def@ult\f@@@eo{eo}\temp@c - \if@twoside\else - \if\f@@@eo e\@fancywarning - {\string#1's `E' option without twoside option is useless}\fi\fi - \def@ult\f@@@lcr{lcr}\temp@c - \def@ult\f@@@hf{hf}{#2\temp@c}% - \@forc\f@@eo\f@@@eo - {\@forc\f@@lcr\f@@@lcr - {\@forc\f@@hf\f@@@hf - {\expandafter\fancy@def\csname - f@ncy\f@@eo\f@@lcr\f@@hf\endcsname - {#4}}}}}} - -\def\f@ncyhfoffs#1#2[#3]#4{% - \def\temp@c{}% - \@forc\tmpf@ra{#3}% - {\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}% - {}{\edef\temp@c{\temp@c\tmpf@ra}}}% - \ifx\@empty\temp@c\else - \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument: - [#3]}% - \fi - \f@nfor\temp@c{#3}% - {\def@ult\f@@@eo{eo}\temp@c - \if@twoside\else - \if\f@@@eo e\@fancywarning - {\string#1's `E' option without twoside option is useless}\fi\fi - \def@ult\f@@@lcr{lr}\temp@c - \def@ult\f@@@hf{hf}{#2\temp@c}% - \@forc\f@@eo\f@@@eo - {\@forc\f@@lcr\f@@@lcr - {\@forc\f@@hf\f@@@hf - {\expandafter\setlength\csname - f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname - {#4}}}}}% - \fancy@setoffs} - -% Fancyheadings version 1 commands. These are more or less deprecated, -% but they continue to work. - -\newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}} -\def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}} -\def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}} - -\newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}} -\def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}} -\def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}} - -\newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}} -\def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}} -\def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}} - -\newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}} -\def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}} -\def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}} - -\newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}} -\def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}} -\def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}} - -\newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}} -\def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}} -\def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}} - -\newlength{\fancy@headwidth} -\let\headwidth\fancy@headwidth -\newlength{\f@ncyO@elh} -\newlength{\f@ncyO@erh} -\newlength{\f@ncyO@olh} -\newlength{\f@ncyO@orh} -\newlength{\f@ncyO@elf} -\newlength{\f@ncyO@erf} -\newlength{\f@ncyO@olf} -\newlength{\f@ncyO@orf} -\newcommand{\headrulewidth}{0.4pt} -\newcommand{\footrulewidth}{0pt} -\newcommand{\footruleskip}{.3\normalbaselineskip} - -% Fancyplain stuff shouldn't be used anymore (rather -% \fancypagestyle{plain} should be used), but it must be present for -% compatibility reasons. - -\newcommand{\plainheadrulewidth}{0pt} -\newcommand{\plainfootrulewidth}{0pt} -\newif\if@fancyplain \@fancyplainfalse -\def\fancyplain#1#2{\if@fancyplain#1\else#2\fi} - -\headwidth=-123456789sp %magic constant - -% Command to reset various things in the headers: -% a.o. single spacing (taken from setspace.sty) -% and the catcode of ^^M (so that epsf files in the header work if a -% verbatim crosses a page boundary) -% It also defines a \nouppercase command that disables \uppercase and -% \Makeuppercase. It can only be used in the headers and footers. -\let\fnch@everypar\everypar% save real \everypar because of spanish.ldf -\def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13 - \def\baselinestretch{1}% - \def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax - \expandafter\let\csname MakeUppercase \endcsname\relax##1}}% - \ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e - \ifx\@normalsize\undefined \normalsize % for ucthesis.cls - \else \@normalsize \fi - \else% NFSS (2.09) present - \@newbaseline% - \fi} - -% Initialization of the head and foot text. - -% The default values still contain \fancyplain for compatibility. -\fancyhf{} % clear all -% lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages -% evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages -\if@twoside - \fancyhead[el,or]{\fancyplain{}{\sl\rightmark}} - \fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}} -\else - \fancyhead[l]{\fancyplain{}{\sl\rightmark}} - \fancyhead[r]{\fancyplain{}{\sl\leftmark}} -\fi -\fancyfoot[c]{\rm\thepage} % page number - -% Use box 0 as a temp box and dimen 0 as temp dimen. -% This can be done, because this code will always -% be used inside another box, and therefore the changes are local. - -\def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning - {\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J - We now make it that large for the rest of the document.^^J - This may cause the page layout to be inconsistent, however\@gobble}% - \dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi - \box0} - -% Put together a header or footer given the left, center and -% right text, fillers at left and right and a rule. -% The \lap commands put the text into an hbox of zero size, -% so overlapping text does not generate an errormessage. -% These macros have 5 parameters: -% 1. LEFTSIDE BEARING % This determines at which side the header will stick -% out. When \fancyhfoffset is used this calculates \headwidth, otherwise -% it is \hss or \relax (after expansion). -% 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component. -% 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp. -% 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component. -% 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion). - -\def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset - \@fancyvbox\headheight{\hbox - {\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill - \parbox[b]{\headwidth}{\centering#3}\hfill - \llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5} - -\def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset - \@fancyvbox\footskip{\footrule - \hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill - \parbox[t]{\headwidth}{\centering#3}\hfill - \llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5} - -\def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi - \hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}} - -\def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi - \vskip-\footruleskip\vskip-\footrulewidth - \hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}} - -\def\ps@fancy{% -\@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook -% -% Define \MakeUppercase for old LaTeXen. -% Note: we used \def rather than \let, so that \let\uppercase\relax (from -% the version 1 documentation) will still work. -% -\@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}% -\@ifundefined{chapter}{\def\sectionmark##1{\markboth -{\MakeUppercase{\ifnum \c@secnumdepth>\z@ - \thesection\hskip 1em\relax \fi ##1}}{}}% -\def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne - \thesubsection\hskip 1em\relax \fi ##1}}}% -{\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne - \@chapapp\ \thechapter. \ \fi ##1}}{}}% -\def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@ - \thesection. \ \fi ##1}}}}% -%\csname ps@headings\endcsname % use \ps@headings defaults if they exist -\ps@@fancy -\gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}% -% Initialize \headwidth if the user didn't -% -\ifdim\headwidth<0sp -% -% This catches the case that \headwidth hasn't been initialized and the -% case that the user added something to \headwidth in the expectation that -% it was initialized to \textwidth. We compensate this now. This loses if -% the user intended to multiply it by a factor. But that case is more -% likely done by saying something like \headwidth=1.2\textwidth. -% The doc says you have to change \headwidth after the first call to -% \pagestyle{fancy}. This code is just to catch the most common cases were -% that requirement is violated. -% - \global\advance\headwidth123456789sp\global\advance\headwidth\textwidth -\fi} -\def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy} -\def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy} -\let\ps@@empty\ps@empty -\def\ps@@fancy{% -\ps@@empty % This is for amsbook/amsart, which do strange things with \topskip -\def\@mkboth{\protect\markboth}% -\def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}% -\def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}% -\def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}% -\def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}% -} -% Default definitions for compatibility mode: -% These cause the header/footer to take the defined \headwidth as width -% And to shift in the direction of the marginpar area - -\def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi} -\def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi} -\let\fancy@Oelh\fancy@Oorh -\let\fancy@Oerh\fancy@Oolh - -\let\fancy@Oolf\fancy@Oolh -\let\fancy@Oorf\fancy@Oorh -\let\fancy@Oelf\fancy@Oelh -\let\fancy@Oerf\fancy@Oerh - -% New definitions for the use of \fancyhfoffset -% These calculate the \headwidth from \textwidth and the specified offsets. - -\def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh - \advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh} -\def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh - \advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh} - -\def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf - \advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf} -\def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf - \advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf} - -\def\fancy@setoffs{% -% Just in case \let\headwidth\textwidth was used - \fancy@gbl\let\headwidth\fancy@headwidth - \fancy@gbl\let\fancy@Oolh\fancy@offsolh - \fancy@gbl\let\fancy@Oelh\fancy@offselh - \fancy@gbl\let\fancy@Oorh\hss - \fancy@gbl\let\fancy@Oerh\hss - \fancy@gbl\let\fancy@Oolf\fancy@offsolf - \fancy@gbl\let\fancy@Oelf\fancy@offself - \fancy@gbl\let\fancy@Oorf\hss - \fancy@gbl\let\fancy@Oerf\hss} - -\newif\iffootnote -\let\latex@makecol\@makecol -\def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi -\let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol} -\def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi} -\def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi} -\def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi} - -\newcommand{\fancypagestyle}[2]{% - \@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}} diff --git a/outputs/outputs_20230421_000752/generation.log b/outputs/outputs_20230421_000752/generation.log deleted file mode 100644 index 4ac9703d95c92c9ea34d60b00e9b15a033923a8b..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/generation.log +++ /dev/null @@ -1,123 +0,0 @@ -INFO:utils.gpt_interaction:{"Markov Decision Process": 5, "Q-Learning": 4, "Policy Gradient": 4, "Deep Reinforcement Learning": 5, "Temporal Difference": 3} -INFO:root:For generating keywords, 119 tokens have been used (79 for prompts; 40 for completion). 119 tokens have been used in total. -INFO:utils.prompts:Generated prompts for introduction: I am writing a machine learning survey about 'Reinforcement Learning'. -You need to write the introduction section. Please include five paragraph: Establishing the motivation for the research. Explaining its importance and relevance to the AI community. Clearly state the problem you're addressing, your proposed solution, and the specific research questions or objectives. Briefly mention key related work for context. Explain the main differences from your work. -Please read the following references: -{'1512.07669': ' This article presents a short and concise description of stochastic\napproximation algorithms in reinforcement learning of Markov decision\nprocesses. The algorithms can also be used as a suboptimal method for partially\nobserved Markov decision processes.\n', '1511.02377': ' We provide a full characterization of the set of value functions of Markov\ndecision processes.\n', '1512.09075': ' This paper specifies a notation for Markov decision processes.\n', '2008.10426': ' Decisiveness has proven to be an elegant concept for denumerable Markov\nchains: it is general enough to encompass several natural classes of\ndenumerable Markov chains, and is a sufficient condition for simple qualitative\nand approximate quantitative model checking algorithms to exist. In this paper,\nwe explore how to extend the notion of decisiveness to Markov decision\nprocesses. Compared to Markov chains, the extra non-determinism can be resolved\nin an adversarial or cooperative way, yielding two natural notions of\ndecisiveness. We then explore whether these notions yield model checking\nprocedures concerning the infimum and supremum probabilities of reachability\nproperties.\n', '0711.2185': ' For a countable-state Markov decision process we introduce an embedding which\nproduces a finite-state Markov decision process. The finite-state embedded\nprocess has the same optimal cost, and moreover, it has the same dynamics as\nthe original process when restricting to the approximating set. The embedded\nprocess can be used as an approximation which, being finite, is more convenient\nfor computation and implementation.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1708.05866': ' Deep reinforcement learning is poised to revolutionise the field of AI and\nrepresents a step towards building autonomous systems with a higher level\nunderstanding of the visual world. Currently, deep learning is enabling\nreinforcement learning to scale to problems that were previously intractable,\nsuch as learning to play video games directly from pixels. Deep reinforcement\nlearning algorithms are also applied to robotics, allowing control policies for\nrobots to be learned directly from camera inputs in the real world. In this\nsurvey, we begin with an introduction to the general field of reinforcement\nlearning, then progress to the main streams of value-based and policy-based\nmethods. Our survey will cover central algorithms in deep reinforcement\nlearning, including the deep $Q$-network, trust region policy optimisation, and\nasynchronous advantage actor-critic. In parallel, we highlight the unique\nadvantages of deep neural networks, focusing on visual understanding via\nreinforcement learning. To conclude, we describe several current areas of\nresearch within the field.\n', '1906.10025': ' Recent advances in Reinforcement Learning, grounded on combining classical\ntheoretical results with Deep Learning paradigm, led to breakthroughs in many\nartificial intelligence tasks and gave birth to Deep Reinforcement Learning\n(DRL) as a field of research. In this work latest DRL algorithms are reviewed\nwith a focus on their theoretical justification, practical limitations and\nobserved empirical properties.\n', '2111.01334': ' Quantifying the structural and functional differences of temporal networks is\na fundamental and challenging problem in the era of big data. This work\nproposes a temporal dissimilarity measure for temporal network comparison based\non the fastest arrival distance distribution and spectral entropy based\nJensen-Shannon divergence. Experimental results on both synthetic and empirical\ntemporal networks show that the proposed measure could discriminate diverse\ntemporal networks with different structures by capturing various topological\nand temporal properties. Moreover, the proposed measure can discern the\nfunctional distinctions and is found effective applications in temporal network\nclassification and spreadability discrimination.\n', '2110.06553': ' Electroencephalography (EEG) is a popular and effective tool for emotion\nrecognition. However, the propagation mechanisms of EEG in the human brain and\nits intrinsic correlation with emotions are still obscure to researchers. This\nwork proposes four variant transformer frameworks~(spatial attention, temporal\nattention, sequential spatial-temporal attention and simultaneous\nspatial-temporal attention) for EEG emotion recognition to explore the\nrelationship between emotion and spatial-temporal EEG features. Specifically,\nspatial attention and temporal attention are to learn the topological structure\ninformation and time-varying EEG characteristics for emotion recognition\nrespectively. Sequential spatial-temporal attention does the spatial attention\nwithin a one-second segment and temporal attention within one sample\nsequentially to explore the influence degree of emotional stimulation on EEG\nsignals of diverse EEG electrodes in the same temporal segment. The\nsimultaneous spatial-temporal attention, whose spatial and temporal attention\nare performed simultaneously, is used to model the relationship between\ndifferent spatial features in different time segments. The experimental results\ndemonstrate that simultaneous spatial-temporal attention leads to the best\nemotion recognition accuracy among the design choices, indicating modeling the\ncorrelation of spatial and temporal features of EEG signals is significant to\nemotion recognition.\n', '2007.04828': ' Links in most real networks often change over time. Such temporality of links\nencodes the ordering and causality of interactions between nodes and has a\nprofound effect on network dynamics and function. Empirical evidences have\nshown that the temporal nature of links in many real-world networks is not\nrandom. Nonetheless, it is challenging to predict temporal link patterns while\nconsidering the entanglement between topological and temporal link patterns.\nHere we propose an entropy-rate based framework, based on combined\ntopological-temporal regularities, for quantifying the predictability of any\ntemporal network. We apply our framework on various model networks,\ndemonstrating that it indeed captures the intrinsic topological-temporal\nregularities whereas previous methods considered only temporal aspects. We also\napply our framework on 18 real networks of different types and determine their\npredictability. Interestingly, we find that for most real temporal networks,\ndespite the greater complexity of predictability brought by the increase in\ndimension the combined topological-temporal predictability is higher than the\ntemporal predictability. Our results demonstrate the necessity of incorporating\nboth temporal and topological aspects of networks in order to improve\npredictions of dynamical processes.\n'} -Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph. -Put your response (do not include \section{...}) in the following Python script:with open("introduction.tex", "w") as f: f.write(r'''your_response''') -INFO:utils.gpt_interaction:Reinforcement Learning (RL) has emerged as a significant research area in the field of artificial intelligence, with a wide range of applications in robotics, finance, healthcare, and gaming \cite{2108.11510}. The primary goal of RL is to develop algorithms that allow agents to learn optimal policies through interaction with their environment, maximizing the cumulative reward over time \cite{1708.05866}. Despite the considerable progress made in recent years, RL still faces several challenges, such as the trade-off between exploration and exploitation, the curse of dimensionality, and the need for efficient algorithms that can handle large-scale and complex problems \cite{1906.10025}. - -One of the major breakthroughs in RL has been the development of Q-learning algorithms, which have been proven to converge to the optimal solution \cite{2303.08631}. However, Q-learning is known to suffer from overestimation bias, leading to suboptimal performance and slow convergence in some cases \cite{2106.14642}. To address this issue, researchers have proposed various modifications and extensions to Q-learning, such as Double Q-learning \cite{1511.02377} and Self-correcting Q-learning \cite{2012.01100}, which aim to mitigate the overestimation bias while maintaining convergence guarantees. - -Another essential aspect of RL research is the incorporation of deep learning techniques, giving rise to the field of Deep Reinforcement Learning (DRL) \cite{1709.05067}. DRL has demonstrated remarkable success in various domains, such as playing video games directly from pixels and learning control policies for robots \cite{1708.05866}. However, DRL algorithms often require a large amount of data and computational resources, which limits their applicability in real-world scenarios \cite{1906.10025}. To overcome these limitations, researchers have proposed various approaches, including distributed DRL \cite{2212.00253} and expert-guided DRL \cite{2106.14642}, which aim to improve the sample efficiency and scalability of DRL algorithms. - -Related work in the field of RL has also focused on the development of policy gradient methods, which optimize the policy directly by following the gradient of the expected return \cite{1811.09013}. These methods have been particularly successful in continuous action settings and have led to the development of algorithms such as Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. However, policy gradient methods often require on-policy data, which can be inefficient in terms of sample complexity \cite{1911.04817}. - -In summary, this survey aims to provide a comprehensive overview of the current state of Reinforcement Learning, focusing on the challenges and recent advances in Q-learning, Deep Reinforcement Learning, and policy gradient methods. By examining the key algorithms, techniques, and applications in these areas, we hope to shed light on the current limitations and future research directions in the field of RL. -INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message. - -INFO:utils.gpt_interaction:assistant_message: Reinforcement Learning (RL) has emerged as a significant research area in the field of artificial intelligence, with a wide range of applications in robotics, finance, healthcare, and gaming \cite{2108.11510}. The primary goal of RL is to develop algorithms that allow agents to learn optimal policies through interaction with their environment, maximizing the cumulative reward over time \cite{1708.05866}. Despite the considerable progress made in recent years, RL still faces several challenges, such as the trade-off between exploration and exploitation, the curse of dimensionality, and the need for efficient algorithms that can handle large-scale and complex problems \cite{1906.10025}. - -One of the major breakthroughs in RL has been the development of Q-learning algorithms, which have been proven to converge to the optimal solution \cite{2303.08631}. However, Q-learning is known to suffer from overestimation bias, leading to suboptimal performance and slow convergence in some cases \cite{2106.14642}. To address this issue, researchers have proposed various modifications and extensions to Q-learning, such as Double Q-learning \cite{1511.02377} and Self-correcting Q-learning \cite{2012.01100}, which aim to mitigate the overestimation bias while maintaining convergence guarantees. - -Another essential aspect of RL research is the incorporation of deep learning techniques, giving rise to the field of Deep Reinforcement Learning (DRL) \cite{1709.05067}. DRL has demonstrated remarkable success in various domains, such as playing video games directly from pixels and learning control policies for robots \cite{1708.05866}. However, DRL algorithms often require a large amount of data and computational resources, which limits their applicability in real-world scenarios \cite{1906.10025}. To overcome these limitations, researchers have proposed various approaches, including distributed DRL \cite{2212.00253} and expert-guided DRL \cite{2106.14642}, which aim to improve the sample efficiency and scalability of DRL algorithms. - -Related work in the field of RL has also focused on the development of policy gradient methods, which optimize the policy directly by following the gradient of the expected return \cite{1811.09013}. These methods have been particularly successful in continuous action settings and have led to the development of algorithms such as Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. However, policy gradient methods often require on-policy data, which can be inefficient in terms of sample complexity \cite{1911.04817}. - -In summary, this survey aims to provide a comprehensive overview of the current state of Reinforcement Learning, focusing on the challenges and recent advances in Q-learning, Deep Reinforcement Learning, and policy gradient methods. By examining the key algorithms, techniques, and applications in these areas, we hope to shed light on the current limitations and future research directions in the field of RL. -INFO:root:For generating introduction, 4397 tokens have been used (3781 for prompts; 616 for completion). 4516 tokens have been used in total. -INFO:utils.prompts:Generated prompts for related works: I am writing a machine learning survey about 'Reinforcement Learning'. -You need to write the related works section. Please discuss key publications, methods, and techniques in your research area. Analyze the strengths and weaknesses of existing methods, and present the related works in a logical manner, often chronologically. Consider using a taxonomy or categorization to structure the discussion. Do not use \section{...} or \subsection{...}; use \paragraph{...} instead. -Please read the following references: -{'1512.07669': ' This article presents a short and concise description of stochastic\napproximation algorithms in reinforcement learning of Markov decision\nprocesses. The algorithms can also be used as a suboptimal method for partially\nobserved Markov decision processes.\n', '1511.02377': ' We provide a full characterization of the set of value functions of Markov\ndecision processes.\n', '1512.09075': ' This paper specifies a notation for Markov decision processes.\n', '2008.10426': ' Decisiveness has proven to be an elegant concept for denumerable Markov\nchains: it is general enough to encompass several natural classes of\ndenumerable Markov chains, and is a sufficient condition for simple qualitative\nand approximate quantitative model checking algorithms to exist. In this paper,\nwe explore how to extend the notion of decisiveness to Markov decision\nprocesses. Compared to Markov chains, the extra non-determinism can be resolved\nin an adversarial or cooperative way, yielding two natural notions of\ndecisiveness. We then explore whether these notions yield model checking\nprocedures concerning the infimum and supremum probabilities of reachability\nproperties.\n', '0711.2185': ' For a countable-state Markov decision process we introduce an embedding which\nproduces a finite-state Markov decision process. The finite-state embedded\nprocess has the same optimal cost, and moreover, it has the same dynamics as\nthe original process when restricting to the approximating set. The embedded\nprocess can be used as an approximation which, being finite, is more convenient\nfor computation and implementation.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1708.05866': ' Deep reinforcement learning is poised to revolutionise the field of AI and\nrepresents a step towards building autonomous systems with a higher level\nunderstanding of the visual world. Currently, deep learning is enabling\nreinforcement learning to scale to problems that were previously intractable,\nsuch as learning to play video games directly from pixels. Deep reinforcement\nlearning algorithms are also applied to robotics, allowing control policies for\nrobots to be learned directly from camera inputs in the real world. In this\nsurvey, we begin with an introduction to the general field of reinforcement\nlearning, then progress to the main streams of value-based and policy-based\nmethods. Our survey will cover central algorithms in deep reinforcement\nlearning, including the deep $Q$-network, trust region policy optimisation, and\nasynchronous advantage actor-critic. In parallel, we highlight the unique\nadvantages of deep neural networks, focusing on visual understanding via\nreinforcement learning. To conclude, we describe several current areas of\nresearch within the field.\n', '1906.10025': ' Recent advances in Reinforcement Learning, grounded on combining classical\ntheoretical results with Deep Learning paradigm, led to breakthroughs in many\nartificial intelligence tasks and gave birth to Deep Reinforcement Learning\n(DRL) as a field of research. In this work latest DRL algorithms are reviewed\nwith a focus on their theoretical justification, practical limitations and\nobserved empirical properties.\n', '2111.01334': ' Quantifying the structural and functional differences of temporal networks is\na fundamental and challenging problem in the era of big data. This work\nproposes a temporal dissimilarity measure for temporal network comparison based\non the fastest arrival distance distribution and spectral entropy based\nJensen-Shannon divergence. Experimental results on both synthetic and empirical\ntemporal networks show that the proposed measure could discriminate diverse\ntemporal networks with different structures by capturing various topological\nand temporal properties. Moreover, the proposed measure can discern the\nfunctional distinctions and is found effective applications in temporal network\nclassification and spreadability discrimination.\n', '2110.06553': ' Electroencephalography (EEG) is a popular and effective tool for emotion\nrecognition. However, the propagation mechanisms of EEG in the human brain and\nits intrinsic correlation with emotions are still obscure to researchers. This\nwork proposes four variant transformer frameworks~(spatial attention, temporal\nattention, sequential spatial-temporal attention and simultaneous\nspatial-temporal attention) for EEG emotion recognition to explore the\nrelationship between emotion and spatial-temporal EEG features. Specifically,\nspatial attention and temporal attention are to learn the topological structure\ninformation and time-varying EEG characteristics for emotion recognition\nrespectively. Sequential spatial-temporal attention does the spatial attention\nwithin a one-second segment and temporal attention within one sample\nsequentially to explore the influence degree of emotional stimulation on EEG\nsignals of diverse EEG electrodes in the same temporal segment. The\nsimultaneous spatial-temporal attention, whose spatial and temporal attention\nare performed simultaneously, is used to model the relationship between\ndifferent spatial features in different time segments. The experimental results\ndemonstrate that simultaneous spatial-temporal attention leads to the best\nemotion recognition accuracy among the design choices, indicating modeling the\ncorrelation of spatial and temporal features of EEG signals is significant to\nemotion recognition.\n', '2007.04828': ' Links in most real networks often change over time. Such temporality of links\nencodes the ordering and causality of interactions between nodes and has a\nprofound effect on network dynamics and function. Empirical evidences have\nshown that the temporal nature of links in many real-world networks is not\nrandom. Nonetheless, it is challenging to predict temporal link patterns while\nconsidering the entanglement between topological and temporal link patterns.\nHere we propose an entropy-rate based framework, based on combined\ntopological-temporal regularities, for quantifying the predictability of any\ntemporal network. We apply our framework on various model networks,\ndemonstrating that it indeed captures the intrinsic topological-temporal\nregularities whereas previous methods considered only temporal aspects. We also\napply our framework on 18 real networks of different types and determine their\npredictability. Interestingly, we find that for most real temporal networks,\ndespite the greater complexity of predictability brought by the increase in\ndimension the combined topological-temporal predictability is higher than the\ntemporal predictability. Our results demonstrate the necessity of incorporating\nboth temporal and topological aspects of networks in order to improve\npredictions of dynamical processes.\n'} -Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph. -Put your response (do not include \section{...}) in the following Python script:with open("related works.tex", "w") as f: f.write(r'''your_response''') -INFO:utils.gpt_interaction:\paragraph{Markov Decision Processes:} -The study of reinforcement learning is fundamentally rooted in the understanding of Markov decision processes (MDPs). A concise description of stochastic approximation algorithms in reinforcement learning of MDPs is provided by \cite{1512.07669}. The work done in \cite{1511.02377} offers a full characterization of the set of value functions of MDPs, while \cite{1512.09075} specifies a notation for MDPs. The concept of decisiveness in denumerable Markov chains has been extended to MDPs in \cite{2008.10426}, exploring the implications of resolving non-determinism in adversarial or cooperative ways. Additionally, \cite{0711.2185} introduces an embedding technique to produce a finite-state MDP from a countable-state MDP, which can be used as an approximation for computational purposes. - -\paragraph{Q-Learning and Variants:} -Q-learning is a widely used reinforcement learning algorithm that converges to the optimal solution \cite{2303.08631}. However, it is known to overestimate values and spend too much time exploring unhelpful states. Double Q-learning, a convergent alternative, mitigates some of these overestimation issues but may lead to slower convergence \cite{2303.08631}. To address the maximization bias in Q-learning, \cite{2012.01100} introduces a self-correcting algorithm that balances the overestimation of conventional Q-learning and the underestimation of Double Q-learning. This self-correcting Q-learning algorithm is shown to be more accurate and achieves faster convergence in certain domains. - -\paragraph{Expert Q-Learning:} -Expert Q-learning is a novel deep reinforcement learning algorithm proposed in \cite{2106.14642}. Inspired by Dueling Q-learning, it incorporates semi-supervised learning into reinforcement learning by splitting Q-values into state values and action advantages. An expert network is designed in addition to the Q-network, which updates each time following the regular offline minibatch update. The algorithm is demonstrated to be more resistant to overestimation bias and achieves more robust performance compared to the baseline Q-learning algorithm. - -\paragraph{Policy Gradient Methods:} -Policy gradient methods are widely used for control in reinforcement learning, particularly in continuous action settings. Natural gradients have been extensively studied within the context of natural gradient actor-critic algorithms and deterministic policy gradients \cite{2209.01820}. The work in \cite{1811.09013} presents the first off-policy policy gradient theorem using emphatic weightings and develops a new actor-critic algorithm called Actor Critic with Emphatic weightings (ACE) that approximates the simplified gradients provided by the theorem. This algorithm is shown to outperform previous off-policy policy gradient methods, such as OffPAC and DPG, in finding the optimal solution. - -\paragraph{Deep Reinforcement Learning:} -Deep reinforcement learning (DRL) combines the power of deep learning with reinforcement learning, achieving remarkable success in various domains, such as finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. The field has seen significant advancements in recent years, with central algorithms such as the deep Q-network, trust region policy optimization, and asynchronous advantage actor-critic being developed \cite{1708.05866}. A detailed review of DRL algorithms and their theoretical justifications, practical limitations, and empirical properties can be found in \cite{1906.10025}. - -\paragraph{Temporal Networks:} -Temporal networks, where links change over time, are essential in understanding the ordering and causality of interactions between nodes in various applications. The work in \cite{2111.01334} proposes a temporal dissimilarity measure for temporal network comparison based on the fastest arrival distance distribution and spectral entropy-based Jensen-Shannon divergence. This measure is shown to effectively discriminate diverse temporal networks with different structures and functional distinctions. - -In conclusion, reinforcement learning has seen significant advancements in recent years, with various algorithms and techniques being developed to address the challenges in the field. From understanding the fundamentals of MDPs to developing advanced DRL algorithms, researchers continue to push the boundaries of what is possible in reinforcement learning and its applications. -INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message. - -INFO:utils.gpt_interaction:assistant_message: \paragraph{Markov Decision Processes:} -The study of reinforcement learning is fundamentally rooted in the understanding of Markov decision processes (MDPs). A concise description of stochastic approximation algorithms in reinforcement learning of MDPs is provided by \cite{1512.07669}. The work done in \cite{1511.02377} offers a full characterization of the set of value functions of MDPs, while \cite{1512.09075} specifies a notation for MDPs. The concept of decisiveness in denumerable Markov chains has been extended to MDPs in \cite{2008.10426}, exploring the implications of resolving non-determinism in adversarial or cooperative ways. Additionally, \cite{0711.2185} introduces an embedding technique to produce a finite-state MDP from a countable-state MDP, which can be used as an approximation for computational purposes. - -\paragraph{Q-Learning and Variants:} -Q-learning is a widely used reinforcement learning algorithm that converges to the optimal solution \cite{2303.08631}. However, it is known to overestimate values and spend too much time exploring unhelpful states. Double Q-learning, a convergent alternative, mitigates some of these overestimation issues but may lead to slower convergence \cite{2303.08631}. To address the maximization bias in Q-learning, \cite{2012.01100} introduces a self-correcting algorithm that balances the overestimation of conventional Q-learning and the underestimation of Double Q-learning. This self-correcting Q-learning algorithm is shown to be more accurate and achieves faster convergence in certain domains. - -\paragraph{Expert Q-Learning:} -Expert Q-learning is a novel deep reinforcement learning algorithm proposed in \cite{2106.14642}. Inspired by Dueling Q-learning, it incorporates semi-supervised learning into reinforcement learning by splitting Q-values into state values and action advantages. An expert network is designed in addition to the Q-network, which updates each time following the regular offline minibatch update. The algorithm is demonstrated to be more resistant to overestimation bias and achieves more robust performance compared to the baseline Q-learning algorithm. - -\paragraph{Policy Gradient Methods:} -Policy gradient methods are widely used for control in reinforcement learning, particularly in continuous action settings. Natural gradients have been extensively studied within the context of natural gradient actor-critic algorithms and deterministic policy gradients \cite{2209.01820}. The work in \cite{1811.09013} presents the first off-policy policy gradient theorem using emphatic weightings and develops a new actor-critic algorithm called Actor Critic with Emphatic weightings (ACE) that approximates the simplified gradients provided by the theorem. This algorithm is shown to outperform previous off-policy policy gradient methods, such as OffPAC and DPG, in finding the optimal solution. - -\paragraph{Deep Reinforcement Learning:} -Deep reinforcement learning (DRL) combines the power of deep learning with reinforcement learning, achieving remarkable success in various domains, such as finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. The field has seen significant advancements in recent years, with central algorithms such as the deep Q-network, trust region policy optimization, and asynchronous advantage actor-critic being developed \cite{1708.05866}. A detailed review of DRL algorithms and their theoretical justifications, practical limitations, and empirical properties can be found in \cite{1906.10025}. - -\paragraph{Temporal Networks:} -Temporal networks, where links change over time, are essential in understanding the ordering and causality of interactions between nodes in various applications. The work in \cite{2111.01334} proposes a temporal dissimilarity measure for temporal network comparison based on the fastest arrival distance distribution and spectral entropy-based Jensen-Shannon divergence. This measure is shown to effectively discriminate diverse temporal networks with different structures and functional distinctions. - -In conclusion, reinforcement learning has seen significant advancements in recent years, with various algorithms and techniques being developed to address the challenges in the field. From understanding the fundamentals of MDPs to developing advanced DRL algorithms, researchers continue to push the boundaries of what is possible in reinforcement learning and its applications. -INFO:root:For generating related works, 4667 tokens have been used (3793 for prompts; 874 for completion). 9183 tokens have been used in total. -INFO:utils.prompts:Generated prompts for backgrounds: I am writing a machine learning survey about 'Reinforcement Learning'. -You need to write the backgrounds section. Please clearly state the problem your research addresses. Explain the foundational theories, concepts, and principles that underpin your research using as many as mathematical formulas or equations (written in LaTeX). Introduce any necessary mathematical notations, equations, or algorithms that are central to your work (written them in LaTeX). Provide a high-level summary of the methodology in this paper to address the problem. Introduce the evaluation metrics this paper will use to assess the performance of models or algorithms. Do not include \section{...} but you can have \subsection{...}. -Please read the following references: -{'1512.07669': ' This article presents a short and concise description of stochastic\napproximation algorithms in reinforcement learning of Markov decision\nprocesses. The algorithms can also be used as a suboptimal method for partially\nobserved Markov decision processes.\n', '1511.02377': ' We provide a full characterization of the set of value functions of Markov\ndecision processes.\n', '1512.09075': ' This paper specifies a notation for Markov decision processes.\n', '2008.10426': ' Decisiveness has proven to be an elegant concept for denumerable Markov\nchains: it is general enough to encompass several natural classes of\ndenumerable Markov chains, and is a sufficient condition for simple qualitative\nand approximate quantitative model checking algorithms to exist. In this paper,\nwe explore how to extend the notion of decisiveness to Markov decision\nprocesses. Compared to Markov chains, the extra non-determinism can be resolved\nin an adversarial or cooperative way, yielding two natural notions of\ndecisiveness. We then explore whether these notions yield model checking\nprocedures concerning the infimum and supremum probabilities of reachability\nproperties.\n', '0711.2185': ' For a countable-state Markov decision process we introduce an embedding which\nproduces a finite-state Markov decision process. The finite-state embedded\nprocess has the same optimal cost, and moreover, it has the same dynamics as\nthe original process when restricting to the approximating set. The embedded\nprocess can be used as an approximation which, being finite, is more convenient\nfor computation and implementation.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1708.05866': ' Deep reinforcement learning is poised to revolutionise the field of AI and\nrepresents a step towards building autonomous systems with a higher level\nunderstanding of the visual world. Currently, deep learning is enabling\nreinforcement learning to scale to problems that were previously intractable,\nsuch as learning to play video games directly from pixels. Deep reinforcement\nlearning algorithms are also applied to robotics, allowing control policies for\nrobots to be learned directly from camera inputs in the real world. In this\nsurvey, we begin with an introduction to the general field of reinforcement\nlearning, then progress to the main streams of value-based and policy-based\nmethods. Our survey will cover central algorithms in deep reinforcement\nlearning, including the deep $Q$-network, trust region policy optimisation, and\nasynchronous advantage actor-critic. In parallel, we highlight the unique\nadvantages of deep neural networks, focusing on visual understanding via\nreinforcement learning. To conclude, we describe several current areas of\nresearch within the field.\n', '1906.10025': ' Recent advances in Reinforcement Learning, grounded on combining classical\ntheoretical results with Deep Learning paradigm, led to breakthroughs in many\nartificial intelligence tasks and gave birth to Deep Reinforcement Learning\n(DRL) as a field of research. In this work latest DRL algorithms are reviewed\nwith a focus on their theoretical justification, practical limitations and\nobserved empirical properties.\n', '2111.01334': ' Quantifying the structural and functional differences of temporal networks is\na fundamental and challenging problem in the era of big data. This work\nproposes a temporal dissimilarity measure for temporal network comparison based\non the fastest arrival distance distribution and spectral entropy based\nJensen-Shannon divergence. Experimental results on both synthetic and empirical\ntemporal networks show that the proposed measure could discriminate diverse\ntemporal networks with different structures by capturing various topological\nand temporal properties. Moreover, the proposed measure can discern the\nfunctional distinctions and is found effective applications in temporal network\nclassification and spreadability discrimination.\n', '2110.06553': ' Electroencephalography (EEG) is a popular and effective tool for emotion\nrecognition. However, the propagation mechanisms of EEG in the human brain and\nits intrinsic correlation with emotions are still obscure to researchers. This\nwork proposes four variant transformer frameworks~(spatial attention, temporal\nattention, sequential spatial-temporal attention and simultaneous\nspatial-temporal attention) for EEG emotion recognition to explore the\nrelationship between emotion and spatial-temporal EEG features. Specifically,\nspatial attention and temporal attention are to learn the topological structure\ninformation and time-varying EEG characteristics for emotion recognition\nrespectively. Sequential spatial-temporal attention does the spatial attention\nwithin a one-second segment and temporal attention within one sample\nsequentially to explore the influence degree of emotional stimulation on EEG\nsignals of diverse EEG electrodes in the same temporal segment. The\nsimultaneous spatial-temporal attention, whose spatial and temporal attention\nare performed simultaneously, is used to model the relationship between\ndifferent spatial features in different time segments. The experimental results\ndemonstrate that simultaneous spatial-temporal attention leads to the best\nemotion recognition accuracy among the design choices, indicating modeling the\ncorrelation of spatial and temporal features of EEG signals is significant to\nemotion recognition.\n', '2007.04828': ' Links in most real networks often change over time. Such temporality of links\nencodes the ordering and causality of interactions between nodes and has a\nprofound effect on network dynamics and function. Empirical evidences have\nshown that the temporal nature of links in many real-world networks is not\nrandom. Nonetheless, it is challenging to predict temporal link patterns while\nconsidering the entanglement between topological and temporal link patterns.\nHere we propose an entropy-rate based framework, based on combined\ntopological-temporal regularities, for quantifying the predictability of any\ntemporal network. We apply our framework on various model networks,\ndemonstrating that it indeed captures the intrinsic topological-temporal\nregularities whereas previous methods considered only temporal aspects. We also\napply our framework on 18 real networks of different types and determine their\npredictability. Interestingly, we find that for most real temporal networks,\ndespite the greater complexity of predictability brought by the increase in\ndimension the combined topological-temporal predictability is higher than the\ntemporal predictability. Our results demonstrate the necessity of incorporating\nboth temporal and topological aspects of networks in order to improve\npredictions of dynamical processes.\n'} -Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph. -Put your response (do not include \section{...}) in the following Python script:with open("backgrounds.tex", "w") as f: f.write(r'''your_response''') -INFO:utils.gpt_interaction:\subsection{Problem Statement and Foundational Concepts} - -Reinforcement Learning (RL) is a subfield of machine learning that focuses on training agents to make decisions in an environment to maximize a cumulative reward signal. In RL, an agent interacts with an environment through a sequence of actions, observations, and rewards, aiming to learn an optimal policy that maps states to actions \cite{1512.09075}. The problem can be formalized as a Markov Decision Process (MDP), which is defined by a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{1511.02377}. The goal of RL is to find a policy $\pi(a|s)$ that maximizes the expected cumulative reward, defined as $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $R_{t+k+1}$ is the reward received at time step $t+k+1$ \cite{1512.07669}. - -\subsection{Q-Learning and Related Algorithms} - -Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected cumulative reward of taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by: - -\[Q(s, a) \leftarrow Q(s, a) + \alpha \left[ R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a) \right],\] - -where $\alpha$ is the learning rate, $R(s, a)$ is the reward for taking action $a$ in state $s$, and $s'$ is the next state \cite{2303.08631}. However, Q-learning can suffer from overestimation bias, which can lead to suboptimal performance \cite{2106.14642}. To address this issue, Double Q-learning was proposed, which uses two separate Q-value estimators and updates them alternately, mitigating overestimation bias while maintaining convergence guarantees \cite{2303.08631}. Another variant, Expert Q-learning, incorporates semi-supervised learning by splitting Q-values into state values and action advantages, and using an expert network to assess the value of states \cite{2106.14642}. - -\subsection{Policy Gradient Methods} - -Policy gradient methods are another class of RL algorithms that optimize the policy directly by estimating the gradient of the expected cumulative reward with respect to the policy parameters \cite{1703.02102}. The policy gradient theorem provides a simplified form for the gradient, which can be used to derive on-policy and off-policy algorithms \cite{1811.09013}. Natural policy gradients, which incorporate second-order information to improve convergence, form the foundation for state-of-the-art algorithms like Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. - -\subsection{Methodology and Evaluation Metrics} - -In this paper, we will explore various RL algorithms, focusing on Q-learning and its variants, as well as policy gradient methods. We will delve into their theoretical foundations, convergence properties, and practical limitations. To assess the performance of these algorithms, we will use evaluation metrics such as cumulative reward, convergence speed, and sample efficiency. By comparing the performance of different algorithms, we aim to provide insights into their strengths and weaknesses, and identify potential areas for improvement and future research directions. -INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message. - -INFO:utils.gpt_interaction:assistant_message: \subsection{Problem Statement and Foundational Concepts} - -Reinforcement Learning (RL) is a subfield of machine learning that focuses on training agents to make decisions in an environment to maximize a cumulative reward signal. In RL, an agent interacts with an environment through a sequence of actions, observations, and rewards, aiming to learn an optimal policy that maps states to actions \cite{1512.09075}. The problem can be formalized as a Markov Decision Process (MDP), which is defined by a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{1511.02377}. The goal of RL is to find a policy $\pi(a|s)$ that maximizes the expected cumulative reward, defined as $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $R_{t+k+1}$ is the reward received at time step $t+k+1$ \cite{1512.07669}. - -\subsection{Q-Learning and Related Algorithms} - -Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected cumulative reward of taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by: - -\[Q(s, a) \leftarrow Q(s, a) + \alpha \left[ R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a) \right],\] - -where $\alpha$ is the learning rate, $R(s, a)$ is the reward for taking action $a$ in state $s$, and $s'$ is the next state \cite{2303.08631}. However, Q-learning can suffer from overestimation bias, which can lead to suboptimal performance \cite{2106.14642}. To address this issue, Double Q-learning was proposed, which uses two separate Q-value estimators and updates them alternately, mitigating overestimation bias while maintaining convergence guarantees \cite{2303.08631}. Another variant, Expert Q-learning, incorporates semi-supervised learning by splitting Q-values into state values and action advantages, and using an expert network to assess the value of states \cite{2106.14642}. - -\subsection{Policy Gradient Methods} - -Policy gradient methods are another class of RL algorithms that optimize the policy directly by estimating the gradient of the expected cumulative reward with respect to the policy parameters \cite{1703.02102}. The policy gradient theorem provides a simplified form for the gradient, which can be used to derive on-policy and off-policy algorithms \cite{1811.09013}. Natural policy gradients, which incorporate second-order information to improve convergence, form the foundation for state-of-the-art algorithms like Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. - -\subsection{Methodology and Evaluation Metrics} - -In this paper, we will explore various RL algorithms, focusing on Q-learning and its variants, as well as policy gradient methods. We will delve into their theoretical foundations, convergence properties, and practical limitations. To assess the performance of these algorithms, we will use evaluation metrics such as cumulative reward, convergence speed, and sample efficiency. By comparing the performance of different algorithms, we aim to provide insights into their strengths and weaknesses, and identify potential areas for improvement and future research directions. -INFO:root:For generating backgrounds, 4606 tokens have been used (3831 for prompts; 775 for completion). 13789 tokens have been used in total. diff --git a/outputs/outputs_20230421_000752/iclr2022_conference.bst b/outputs/outputs_20230421_000752/iclr2022_conference.bst deleted file mode 100644 index 149a48c5be151e84bc9f0f4ecb1381875e71573e..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/iclr2022_conference.bst +++ /dev/null @@ -1,1440 +0,0 @@ -%% File: `iclr2017.bst' -%% A copy of iclm2010.bst, which is a modification of `plainnl.bst' for use with natbib package -%% -%% Copyright 2010 Hal Daum\'e III -%% Modified by J. Frnkranz -%% - Changed labels from (X and Y, 2000) to (X & Y, 2000) -%% -%% Copyright 1993-2007 Patrick W Daly -%% Max-Planck-Institut f\"ur Sonnensystemforschung -%% Max-Planck-Str. 2 -%% D-37191 Katlenburg-Lindau -%% Germany -%% E-mail: daly@mps.mpg.de -%% -%% This program can be redistributed and/or modified under the terms -%% of the LaTeX Project Public License Distributed from CTAN -%% archives in directory macros/latex/base/lppl.txt; either -%% version 1 of the License, or any later version. -%% - % Version and source file information: - % \ProvidesFile{icml2010.mbs}[2007/11/26 1.93 (PWD)] - % - % BibTeX `plainnat' family - % version 0.99b for BibTeX versions 0.99a or later, - % for LaTeX versions 2.09 and 2e. - % - % For use with the `natbib.sty' package; emulates the corresponding - % member of the `plain' family, but with author-year citations. - % - % With version 6.0 of `natbib.sty', it may also be used for numerical - % citations, while retaining the commands \citeauthor, \citefullauthor, - % and \citeyear to print the corresponding information. - % - % For version 7.0 of `natbib.sty', the KEY field replaces missing - % authors/editors, and the date is left blank in \bibitem. - % - % Includes field EID for the sequence/citation number of electronic journals - % which is used instead of page numbers. - % - % Includes fields ISBN and ISSN. - % - % Includes field URL for Internet addresses. - % - % Includes field DOI for Digital Object Idenfifiers. - % - % Works best with the url.sty package of Donald Arseneau. - % - % Works with identical authors and year are further sorted by - % citation key, to preserve any natural sequence. - % -ENTRY - { address - author - booktitle - chapter - doi - eid - edition - editor - howpublished - institution - isbn - issn - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - url - volume - year - } - {} - { label extra.label sort.label short.list } - -INTEGERS { output.state before.all mid.sentence after.sentence after.block } - -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} - -STRINGS { s t } - -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} - -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} - -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} - -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} - -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} - -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} - -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} - -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} - -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "\emph{" swap$ * "}" * } - if$ -} - -INTEGERS { nameptr namesleft numnames } - -FUNCTION {format.names} -{ 's := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't := - nameptr #1 > - { namesleft #1 > - { ", " * t * } - { numnames #2 > - { "," * } - 'skip$ - if$ - t "others" = - { " et~al." * } - { " and " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {format.key} -{ empty$ - { key field.or.null } - { "" } - if$ -} - -FUNCTION {format.authors} -{ author empty$ - { "" } - { author format.names } - if$ -} - -FUNCTION {format.editors} -{ editor empty$ - { "" } - { editor format.names - editor num.names$ #1 > - { " (eds.)" * } - { " (ed.)" * } - if$ - } - if$ -} - -FUNCTION {format.isbn} -{ isbn empty$ - { "" } - { new.block "ISBN " isbn * } - if$ -} - -FUNCTION {format.issn} -{ issn empty$ - { "" } - { new.block "ISSN " issn * } - if$ -} - -FUNCTION {format.url} -{ url empty$ - { "" } - { new.block "URL \url{" url * "}" * } - if$ -} - -FUNCTION {format.doi} -{ doi empty$ - { "" } - { new.block "\doi{" doi * "}" * } - if$ -} - -FUNCTION {format.title} -{ title empty$ - { "" } - { title "t" change.case$ } - if$ -} - -FUNCTION {format.full.names} -{'s := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr - "{vv~}{ll}" format.name$ 't := - nameptr #1 > - { - namesleft #1 > - { ", " * t * } - { - numnames #2 > - { "," * } - 'skip$ - if$ - t "others" = - { " et~al." * } - { " and " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {author.editor.full} -{ author empty$ - { editor empty$ - { "" } - { editor format.full.names } - if$ - } - { author format.full.names } - if$ -} - -FUNCTION {author.full} -{ author empty$ - { "" } - { author format.full.names } - if$ -} - -FUNCTION {editor.full} -{ editor empty$ - { "" } - { editor format.full.names } - if$ -} - -FUNCTION {make.full.names} -{ type$ "book" = - type$ "inbook" = - or - 'author.editor.full - { type$ "proceedings" = - 'editor.full - 'author.full - if$ - } - if$ -} - -FUNCTION {output.bibitem} -{ newline$ - "\bibitem[" write$ - label write$ - ")" make.full.names duplicate$ short.list = - { pop$ } - { * } - if$ - "]{" * write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {n.dashify} -{ 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {format.date} -{ year duplicate$ empty$ - { "empty year in " cite$ * warning$ - pop$ "" } - 'skip$ - if$ - month empty$ - 'skip$ - { month - " " * swap$ * - } - if$ - extra.label * -} - -FUNCTION {format.btitle} -{ title emphasize -} - -FUNCTION {tie.or.space.connect} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ * * -} - -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} - -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { "volume" volume tie.or.space.connect - series empty$ - 'skip$ - { " of " * series emphasize * } - if$ - "volume and number" number either.or.check - } - if$ -} - -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { output.state mid.sentence = - { "number" } - { "Number" } - if$ - number tie.or.space.connect - series empty$ - { "there's a number but no series in " cite$ * warning$ } - { " in " * series * } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition empty$ - { "" } - { output.state mid.sentence = - { edition "l" change.case$ " edition" * } - { edition "t" change.case$ " edition" * } - if$ - } - if$ -} - -INTEGERS { multiresult } - -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} - -FUNCTION {format.pages} -{ pages empty$ - { "" } - { pages multi.page.check - { "pp.\ " pages n.dashify tie.or.space.connect } - { "pp.\ " pages tie.or.space.connect } - if$ - } - if$ -} - -FUNCTION {format.eid} -{ eid empty$ - { "" } - { "art." eid tie.or.space.connect } - if$ -} - -FUNCTION {format.vol.num.pages} -{ volume field.or.null - number empty$ - 'skip$ - { "\penalty0 (" number * ")" * * - volume empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - } - if$ - pages empty$ - 'skip$ - { duplicate$ empty$ - { pop$ format.pages } - { ":\penalty0 " * pages n.dashify * } - if$ - } - if$ -} - -FUNCTION {format.vol.num.eid} -{ volume field.or.null - number empty$ - 'skip$ - { "\penalty0 (" number * ")" * * - volume empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - } - if$ - eid empty$ - 'skip$ - { duplicate$ empty$ - { pop$ format.eid } - { ":\penalty0 " * eid * } - if$ - } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { "chapter" } - { type "l" change.case$ } - if$ - chapter tie.or.space.connect - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - -FUNCTION {format.in.ed.booktitle} -{ booktitle empty$ - { "" } - { editor empty$ - { "In " booktitle emphasize * } - { "In " format.editors * ", " * booktitle emphasize * } - if$ - } - if$ -} - -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - key empty$ not and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} - -FUNCTION {format.thesis.type} -{ type empty$ - 'skip$ - { pop$ - type "t" change.case$ - } - if$ -} - -FUNCTION {format.tr.number} -{ type empty$ - { "Technical Report" } - 'type - if$ - number empty$ - { "t" change.case$ } - { number tie.or.space.connect } - if$ -} - -FUNCTION {format.article.crossref} -{ key empty$ - { journal empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * - warning$ - "" - } - { "In \emph{" journal * "}" * } - if$ - } - { "In " } - if$ - " \citet{" * crossref * "}" * -} - -FUNCTION {format.book.crossref} -{ volume empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - "In " - } - { "Volume" volume tie.or.space.connect - " of " * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { "\emph{" * series * "}" * } - if$ - } - 'skip$ - if$ - } - 'skip$ - if$ - " \citet{" * crossref * "}" * -} - -FUNCTION {format.incoll.inproc.crossref} -{ editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { booktitle empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - "" - } - { "In \emph{" booktitle * "}" * } - if$ - } - { "In " } - if$ - } - { "In " } - if$ - " \citet{" * crossref * "}" * -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - author format.key output - new.block - format.title "title" output.check - new.block - crossref missing$ - { journal emphasize "journal" output.check - eid empty$ - { format.vol.num.pages output } - { format.vol.num.eid output } - if$ - format.date "year" output.check - } - { format.article.crossref output.nonnull - eid empty$ - { format.pages output } - { format.eid output } - if$ - } - if$ - format.issn output - format.doi output - format.url output - new.block - note output - fin.entry -} - -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check - editor format.key output - } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - format.isbn output - format.doi output - format.url output - new.block - note output - fin.entry -} - -FUNCTION {booklet} -{ output.bibitem - format.authors output - author format.key output - new.block - format.title "title" output.check - howpublished address new.block.checkb - howpublished output - address output - format.date output - format.isbn output - format.doi output - format.url output - new.block - note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check - editor format.key output - } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - format.isbn output - format.doi output - format.url output - new.block - note output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - author format.key output - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.chapter.pages output - new.sentence - publisher "publisher" output.check - address output - format.edition output - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ - format.isbn output - format.doi output - format.url output - new.block - note output - fin.entry -} - -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - author format.key output - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.pages output - address empty$ - { organization publisher new.sentence.checkb - organization output - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - organization output - publisher output - } - if$ - } - { format.incoll.inproc.crossref output.nonnull - format.pages output - } - if$ - format.isbn output - format.doi output - format.url output - new.block - note output - fin.entry -} - -FUNCTION {conference} { inproceedings } - -FUNCTION {manual} -{ output.bibitem - format.authors output - author format.key output - new.block - format.btitle "title" output.check - organization address new.block.checkb - organization output - address output - format.edition output - format.date output - format.url output - new.block - note output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - author format.key output - new.block - format.title "title" output.check - new.block - "Master's thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - format.url output - new.block - note output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - author format.key output - title howpublished new.block.checkb - format.title output - howpublished new.block.checka - howpublished output - format.date output - format.issn output - format.url output - new.block - note output - fin.entry - empty.misc.check -} - -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - author format.key output - new.block - format.btitle "title" output.check - new.block - "PhD thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - format.url output - new.block - note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - format.editors output - editor format.key output - new.block - format.btitle "title" output.check - format.bvolume output - format.number.series output - address output - format.date "year" output.check - new.sentence - organization output - publisher output - format.isbn output - format.doi output - format.url output - new.block - note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - author format.key output - new.block - format.title "title" output.check - new.block - format.tr.number output.nonnull - institution "institution" output.check - address output - format.date "year" output.check - format.url output - new.block - note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - author format.key output - new.block - format.title "title" output.check - new.block - note "note" output.check - format.date output - format.url output - fin.entry -} - -FUNCTION {default.type} { misc } - - -MACRO {jan} {"January"} - -MACRO {feb} {"February"} - -MACRO {mar} {"March"} - -MACRO {apr} {"April"} - -MACRO {may} {"May"} - -MACRO {jun} {"June"} - -MACRO {jul} {"July"} - -MACRO {aug} {"August"} - -MACRO {sep} {"September"} - -MACRO {oct} {"October"} - -MACRO {nov} {"November"} - -MACRO {dec} {"December"} - - - -MACRO {acmcs} {"ACM Computing Surveys"} - -MACRO {acta} {"Acta Informatica"} - -MACRO {cacm} {"Communications of the ACM"} - -MACRO {ibmjrd} {"IBM Journal of Research and Development"} - -MACRO {ibmsj} {"IBM Systems Journal"} - -MACRO {ieeese} {"IEEE Transactions on Software Engineering"} - -MACRO {ieeetc} {"IEEE Transactions on Computers"} - -MACRO {ieeetcad} - {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"} - -MACRO {ipl} {"Information Processing Letters"} - -MACRO {jacm} {"Journal of the ACM"} - -MACRO {jcss} {"Journal of Computer and System Sciences"} - -MACRO {scp} {"Science of Computer Programming"} - -MACRO {sicomp} {"SIAM Journal on Computing"} - -MACRO {tocs} {"ACM Transactions on Computer Systems"} - -MACRO {tods} {"ACM Transactions on Database Systems"} - -MACRO {tog} {"ACM Transactions on Graphics"} - -MACRO {toms} {"ACM Transactions on Mathematical Software"} - -MACRO {toois} {"ACM Transactions on Office Information Systems"} - -MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"} - -MACRO {tcs} {"Theoretical Computer Science"} - - -READ - -FUNCTION {sortify} -{ purify$ - "l" change.case$ -} - -INTEGERS { len } - -FUNCTION {chop.word} -{ 's := - 'len := - s #1 len substring$ = - { s len #1 + global.max$ substring$ } - 's - if$ -} - -FUNCTION {format.lab.names} -{ 's := - s #1 "{vv~}{ll}" format.name$ - s num.names$ duplicate$ - #2 > - { pop$ " et~al." * } - { #2 < - 'skip$ - { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { " et~al." * } - { " \& " * s #2 "{vv~}{ll}" format.name$ * } - if$ - } - if$ - } - if$ -} - -FUNCTION {author.key.label} -{ author empty$ - { key empty$ - { cite$ #1 #3 substring$ } - 'key - if$ - } - { author format.lab.names } - if$ -} - -FUNCTION {author.editor.key.label} -{ author empty$ - { editor empty$ - { key empty$ - { cite$ #1 #3 substring$ } - 'key - if$ - } - { editor format.lab.names } - if$ - } - { author format.lab.names } - if$ -} - -FUNCTION {author.key.organization.label} -{ author empty$ - { key empty$ - { organization empty$ - { cite$ #1 #3 substring$ } - { "The " #4 organization chop.word #3 text.prefix$ } - if$ - } - 'key - if$ - } - { author format.lab.names } - if$ -} - -FUNCTION {editor.key.organization.label} -{ editor empty$ - { key empty$ - { organization empty$ - { cite$ #1 #3 substring$ } - { "The " #4 organization chop.word #3 text.prefix$ } - if$ - } - 'key - if$ - } - { editor format.lab.names } - if$ -} - -FUNCTION {calc.short.authors} -{ type$ "book" = - type$ "inbook" = - or - 'author.editor.key.label - { type$ "proceedings" = - 'editor.key.organization.label - { type$ "manual" = - 'author.key.organization.label - 'author.key.label - if$ - } - if$ - } - if$ - 'short.list := -} - -FUNCTION {calc.label} -{ calc.short.authors - short.list - "(" - * - year duplicate$ empty$ - short.list key field.or.null = or - { pop$ "" } - 'skip$ - if$ - * - 'label := -} - -FUNCTION {sort.format.names} -{ 's := - #1 'nameptr := - "" - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { - s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't := - nameptr #1 > - { - " " * - namesleft #1 = t "others" = and - { "zzzzz" * } - { numnames #2 > nameptr #2 = and - { "zz" * year field.or.null * " " * } - 'skip$ - if$ - t sortify * - } - if$ - } - { t sortify * } - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {sort.format.title} -{ 't := - "A " #2 - "An " #3 - "The " #4 t chop.word - chop.word - chop.word - sortify - #1 global.max$ substring$ -} - -FUNCTION {author.sort} -{ author empty$ - { key empty$ - { "to sort, need author or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.editor.sort} -{ author empty$ - { editor empty$ - { key empty$ - { "to sort, need author, editor, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { editor sort.format.names } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.organization.sort} -{ author empty$ - { organization empty$ - { key empty$ - { "to sort, need author, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {editor.organization.sort} -{ editor empty$ - { organization empty$ - { key empty$ - { "to sort, need editor, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { editor sort.format.names } - if$ -} - - -FUNCTION {presort} -{ calc.label - label sortify - " " - * - type$ "book" = - type$ "inbook" = - or - 'author.editor.sort - { type$ "proceedings" = - 'editor.organization.sort - { type$ "manual" = - 'author.organization.sort - 'author.sort - if$ - } - if$ - } - if$ - " " - * - year field.or.null sortify - * - " " - * - cite$ - * - #1 entry.max$ substring$ - 'sort.label := - sort.label * - #1 entry.max$ substring$ - 'sort.key$ := -} - -ITERATE {presort} - -SORT - -STRINGS { longest.label last.label next.extra } - -INTEGERS { longest.label.width last.extra.num number.label } - -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #0 int.to.chr$ 'last.label := - "" 'next.extra := - #0 'longest.label.width := - #0 'last.extra.num := - #0 'number.label := -} - -FUNCTION {forward.pass} -{ last.label label = - { last.extra.num #1 + 'last.extra.num := - last.extra.num int.to.chr$ 'extra.label := - } - { "a" chr.to.int$ 'last.extra.num := - "" 'extra.label := - label 'last.label := - } - if$ - number.label #1 + 'number.label := -} - -FUNCTION {reverse.pass} -{ next.extra "b" = - { "a" 'extra.label := } - 'skip$ - if$ - extra.label 'next.extra := - extra.label - duplicate$ empty$ - 'skip$ - { "{\natexlab{" swap$ * "}}" * } - if$ - 'extra.label := - label extra.label * 'label := -} - -EXECUTE {initialize.longest.label} - -ITERATE {forward.pass} - -REVERSE {reverse.pass} - -FUNCTION {bib.sort.order} -{ sort.label 'sort.key$ := -} - -ITERATE {bib.sort.order} - -SORT - -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" number.label int.to.str$ * "}" * - write$ newline$ - "\providecommand{\natexlab}[1]{#1}" - write$ newline$ - "\providecommand{\url}[1]{\texttt{#1}}" - write$ newline$ - "\expandafter\ifx\csname urlstyle\endcsname\relax" - write$ newline$ - " \providecommand{\doi}[1]{doi: #1}\else" - write$ newline$ - " \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi" - write$ newline$ -} - -EXECUTE {begin.bib} - -EXECUTE {init.state.consts} - -ITERATE {call.type$} - -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} - -EXECUTE {end.bib} diff --git a/outputs/outputs_20230421_000752/iclr2022_conference.sty b/outputs/outputs_20230421_000752/iclr2022_conference.sty deleted file mode 100644 index 03c8b38954cb906fc1526e692b6757c5fda87a98..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/iclr2022_conference.sty +++ /dev/null @@ -1,245 +0,0 @@ -%%%% ICLR Macros (LaTex) -%%%% Adapted by Hugo Larochelle from the NIPS stylefile Macros -%%%% Style File -%%%% Dec 12, 1990 Rev Aug 14, 1991; Sept, 1995; April, 1997; April, 1999; October 2014 - -% This file can be used with Latex2e whether running in main mode, or -% 2.09 compatibility mode. -% -% If using main mode, you need to include the commands -% \documentclass{article} -% \usepackage{iclr14submit_e,times} -% - -% Change the overall width of the page. If these parameters are -% changed, they will require corresponding changes in the -% maketitle section. -% -\usepackage{eso-pic} % used by \AddToShipoutPicture -\RequirePackage{fancyhdr} -\RequirePackage{natbib} - -% modification to natbib citations -\setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}} - -\renewcommand{\topfraction}{0.95} % let figure take up nearly whole page -\renewcommand{\textfraction}{0.05} % let figure take up nearly whole page - -% Define iclrfinal, set to true if iclrfinalcopy is defined -\newif\ificlrfinal -\iclrfinalfalse -\def\iclrfinalcopy{\iclrfinaltrue} -\font\iclrtenhv = phvb at 8pt - -% Specify the dimensions of each page - -\setlength{\paperheight}{11in} -\setlength{\paperwidth}{8.5in} - - -\oddsidemargin .5in % Note \oddsidemargin = \evensidemargin -\evensidemargin .5in -\marginparwidth 0.07 true in -%\marginparwidth 0.75 true in -%\topmargin 0 true pt % Nominal distance from top of page to top of -%\topmargin 0.125in -\topmargin -0.625in -\addtolength{\headsep}{0.25in} -\textheight 9.0 true in % Height of text (including footnotes & figures) -\textwidth 5.5 true in % Width of text line. -\widowpenalty=10000 -\clubpenalty=10000 - -% \thispagestyle{empty} \pagestyle{empty} -\flushbottom \sloppy - -% We're never going to need a table of contents, so just flush it to -% save space --- suggested by drstrip@sandia-2 -\def\addcontentsline#1#2#3{} - -% Title stuff, taken from deproc. -\def\maketitle{\par -\begingroup - \def\thefootnote{\fnsymbol{footnote}} - \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} % for perfect author - % name centering -% The footnote-mark was overlapping the footnote-text, -% added the following to fix this problem (MK) - \long\def\@makefntext##1{\parindent 1em\noindent - \hbox to1.8em{\hss $\m@th ^{\@thefnmark}$}##1} - \@maketitle \@thanks -\endgroup -\setcounter{footnote}{0} -\let\maketitle\relax \let\@maketitle\relax -\gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax} - -% The toptitlebar has been raised to top-justify the first page - -\usepackage{fancyhdr} -\pagestyle{fancy} -\fancyhead{} - -% Title (includes both anonimized and non-anonimized versions) -\def\@maketitle{\vbox{\hsize\textwidth -%\linewidth\hsize \vskip 0.1in \toptitlebar \centering -{\LARGE\sc \@title\par} -%\bottomtitlebar % \vskip 0.1in % minus -\ificlrfinal - \lhead{Published as a conference paper at ICLR 2022} - \def\And{\end{tabular}\hfil\linebreak[0]\hfil - \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}% - \def\AND{\end{tabular}\hfil\linebreak[4]\hfil - \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}% - \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\@author\end{tabular}% -\else - \lhead{Under review as a conference paper at ICLR 2022} - \def\And{\end{tabular}\hfil\linebreak[0]\hfil - \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}% - \def\AND{\end{tabular}\hfil\linebreak[4]\hfil - \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}% - \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}Anonymous authors\\Paper under double-blind review\end{tabular}% -\fi -\vskip 0.3in minus 0.1in}} - -\renewenvironment{abstract}{\vskip.075in\centerline{\large\sc -Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex} - -% sections with less space -\def\section{\@startsection {section}{1}{\z@}{-2.0ex plus - -0.5ex minus -.2ex}{1.5ex plus 0.3ex -minus0.2ex}{\large\sc\raggedright}} - -\def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus --0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\sc\raggedright}} -\def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex -plus -0.5ex minus -.2ex}{0.5ex plus -.2ex}{\normalsize\sc\raggedright}} -\def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus -0.5ex minus .2ex}{-1em}{\normalsize\bf}} -\def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus - 0.5ex minus .2ex}{-1em}{\normalsize\sc}} -\def\subsubsubsection{\vskip -5pt{\noindent\normalsize\rm\raggedright}} - - -% Footnotes -\footnotesep 6.65pt % -\skip\footins 9pt plus 4pt minus 2pt -\def\footnoterule{\kern-3pt \hrule width 12pc \kern 2.6pt } -\setcounter{footnote}{0} - -% Lists and paragraphs -\parindent 0pt -\topsep 4pt plus 1pt minus 2pt -\partopsep 1pt plus 0.5pt minus 0.5pt -\itemsep 2pt plus 1pt minus 0.5pt -\parsep 2pt plus 1pt minus 0.5pt -\parskip .5pc - - -%\leftmargin2em -\leftmargin3pc -\leftmargini\leftmargin \leftmarginii 2em -\leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em - -%\labelsep \labelsep 5pt - -\def\@listi{\leftmargin\leftmargini} -\def\@listii{\leftmargin\leftmarginii - \labelwidth\leftmarginii\advance\labelwidth-\labelsep - \topsep 2pt plus 1pt minus 0.5pt - \parsep 1pt plus 0.5pt minus 0.5pt - \itemsep \parsep} -\def\@listiii{\leftmargin\leftmarginiii - \labelwidth\leftmarginiii\advance\labelwidth-\labelsep - \topsep 1pt plus 0.5pt minus 0.5pt - \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt - \itemsep \topsep} -\def\@listiv{\leftmargin\leftmarginiv - \labelwidth\leftmarginiv\advance\labelwidth-\labelsep} -\def\@listv{\leftmargin\leftmarginv - \labelwidth\leftmarginv\advance\labelwidth-\labelsep} -\def\@listvi{\leftmargin\leftmarginvi - \labelwidth\leftmarginvi\advance\labelwidth-\labelsep} - -\abovedisplayskip 7pt plus2pt minus5pt% -\belowdisplayskip \abovedisplayskip -\abovedisplayshortskip 0pt plus3pt% -\belowdisplayshortskip 4pt plus3pt minus3pt% - -% Less leading in most fonts (due to the narrow columns) -% The choices were between 1-pt and 1.5-pt leading -%\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} % got rid of @ (MK) -\def\normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} -\def\small{\@setsize\small{10pt}\ixpt\@ixpt} -\def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt} -\def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt} -\def\tiny{\@setsize\tiny{7pt}\vipt\@vipt} -\def\large{\@setsize\large{14pt}\xiipt\@xiipt} -\def\Large{\@setsize\Large{16pt}\xivpt\@xivpt} -\def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt} -\def\huge{\@setsize\huge{23pt}\xxpt\@xxpt} -\def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt} - -\def\toptitlebar{\hrule height4pt\vskip .25in\vskip-\parskip} - -\def\bottomtitlebar{\vskip .29in\vskip-\parskip\hrule height1pt\vskip -.09in} % -%Reduced second vskip to compensate for adding the strut in \@author - - -%% % Vertical Ruler -%% % This code is, largely, from the CVPR 2010 conference style file -%% % ----- define vruler -%% \makeatletter -%% \newbox\iclrrulerbox -%% \newcount\iclrrulercount -%% \newdimen\iclrruleroffset -%% \newdimen\cv@lineheight -%% \newdimen\cv@boxheight -%% \newbox\cv@tmpbox -%% \newcount\cv@refno -%% \newcount\cv@tot -%% % NUMBER with left flushed zeros \fillzeros[] -%% \newcount\cv@tmpc@ \newcount\cv@tmpc -%% \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi -%% \cv@tmpc=1 % -%% \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi -%% \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat -%% \ifnum#2<0\advance\cv@tmpc1\relax-\fi -%% \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat -%% \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}% -%% % \makevruler[][][][][] -%% \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip -%% \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt% -%% \global\setbox\iclrrulerbox=\vbox to \textheight{% -%% {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight -%% \cv@lineheight=#1\global\iclrrulercount=#2% -%% \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2% -%% \cv@refno1\vskip-\cv@lineheight\vskip1ex% -%% \loop\setbox\cv@tmpbox=\hbox to0cm{{\iclrtenhv\hfil\fillzeros[#4]\iclrrulercount}}% -%% \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break -%% \advance\cv@refno1\global\advance\iclrrulercount#3\relax -%% \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}% -%% \makeatother -%% % ----- end of vruler - -%% % \makevruler[][][][][] -%% \def\iclrruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\iclrrulerbox}} -%% \AddToShipoutPicture{% -%% \ificlrfinal\else -%% \iclrruleroffset=\textheight -%% \advance\iclrruleroffset by -3.7pt -%% \color[rgb]{.7,.7,.7} -%% \AtTextUpperLeft{% -%% \put(\LenToUnit{-35pt},\LenToUnit{-\iclrruleroffset}){%left ruler -%% \iclrruler{\iclrrulercount}} -%% } -%% \fi -%% } -%%% To add a vertical bar on the side -%\AddToShipoutPicture{ -%\AtTextLowerLeft{ -%\hspace*{-1.8cm} -%\colorbox[rgb]{0.7,0.7,0.7}{\small \parbox[b][\textheight]{0.1cm}{}}} -%} diff --git a/outputs/outputs_20230421_000752/introduction.tex b/outputs/outputs_20230421_000752/introduction.tex deleted file mode 100644 index 676ecd9d0dbeda17c405c14d0b9cac304b0855d2..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/introduction.tex +++ /dev/null @@ -1,10 +0,0 @@ -\section{introduction} -Reinforcement Learning (RL) has emerged as a significant research area in the field of artificial intelligence, with a wide range of applications in robotics, finance, healthcare, and gaming \cite{2108.11510}. The primary goal of RL is to develop algorithms that allow agents to learn optimal policies through interaction with their environment, maximizing the cumulative reward over time \cite{1708.05866}. Despite the considerable progress made in recent years, RL still faces several challenges, such as the trade-off between exploration and exploitation, the curse of dimensionality, and the need for efficient algorithms that can handle large-scale and complex problems \cite{1906.10025}. - -One of the major breakthroughs in RL has been the development of Q-learning algorithms, which have been proven to converge to the optimal solution \cite{2303.08631}. However, Q-learning is known to suffer from overestimation bias, leading to suboptimal performance and slow convergence in some cases \cite{2106.14642}. To address this issue, researchers have proposed various modifications and extensions to Q-learning, such as Double Q-learning \cite{1511.02377} and Self-correcting Q-learning \cite{2012.01100}, which aim to mitigate the overestimation bias while maintaining convergence guarantees. - -Another essential aspect of RL research is the incorporation of deep learning techniques, giving rise to the field of Deep Reinforcement Learning (DRL) \cite{1709.05067}. DRL has demonstrated remarkable success in various domains, such as playing video games directly from pixels and learning control policies for robots \cite{1708.05866}. However, DRL algorithms often require a large amount of data and computational resources, which limits their applicability in real-world scenarios \cite{1906.10025}. To overcome these limitations, researchers have proposed various approaches, including distributed DRL \cite{2212.00253} and expert-guided DRL \cite{2106.14642}, which aim to improve the sample efficiency and scalability of DRL algorithms. - -Related work in the field of RL has also focused on the development of policy gradient methods, which optimize the policy directly by following the gradient of the expected return \cite{1811.09013}. These methods have been particularly successful in continuous action settings and have led to the development of algorithms such as Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. However, policy gradient methods often require on-policy data, which can be inefficient in terms of sample complexity \cite{1911.04817}. - -In summary, this survey aims to provide a comprehensive overview of the current state of Reinforcement Learning, focusing on the challenges and recent advances in Q-learning, Deep Reinforcement Learning, and policy gradient methods. By examining the key algorithms, techniques, and applications in these areas, we hope to shed light on the current limitations and future research directions in the field of RL. \ No newline at end of file diff --git a/outputs/outputs_20230421_000752/main.aux b/outputs/outputs_20230421_000752/main.aux deleted file mode 100644 index 7360727da4a71b873394a12f2344b9cbb88f4959..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/main.aux +++ /dev/null @@ -1,92 +0,0 @@ -\relax -\providecommand\hyper@newdestlabel[2]{} -\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument} -\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined -\global\let\oldcontentsline\contentsline -\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}} -\global\let\oldnewlabel\newlabel -\gdef\newlabel#1#2{\newlabelxx{#1}#2} -\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}} -\AtEndDocument{\ifx\hyper@anchor\@undefined -\let\contentsline\oldcontentsline -\let\newlabel\oldnewlabel -\fi} -\fi} -\global\let\hyper@last\relax -\gdef\HyperFirstAtBeginDocument#1{#1} -\providecommand\HyField@AuxAddToFields[1]{} -\providecommand\HyField@AuxAddToCoFields[2]{} -\citation{2108.11510} -\citation{1708.05866} -\citation{1906.10025} -\citation{2303.08631} -\citation{2106.14642} -\citation{1511.02377} -\citation{2012.01100} -\citation{1709.05067} -\citation{1708.05866} -\citation{1906.10025} -\citation{2212.00253} -\citation{2106.14642} -\citation{1811.09013} -\citation{2209.01820} -\citation{1911.04817} -\citation{1512.07669} -\citation{1511.02377} -\citation{1512.09075} -\citation{2008.10426} -\citation{0711.2185} -\@writefile{toc}{\contentsline {section}{\numberline {1}introduction}{1}{section.1}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {2}related works}{1}{section.2}\protected@file@percent } -\@writefile{toc}{\contentsline {paragraph}{Markov Decision Processes:}{1}{section*.1}\protected@file@percent } -\citation{2303.08631} -\citation{2303.08631} -\citation{2012.01100} -\citation{2106.14642} -\citation{2209.01820} -\citation{1811.09013} -\citation{2108.11510} -\citation{1708.05866} -\citation{1906.10025} -\citation{2111.01334} -\citation{1512.09075} -\citation{1511.02377} -\citation{1512.07669} -\@writefile{toc}{\contentsline {paragraph}{Q-Learning and Variants:}{2}{section*.2}\protected@file@percent } -\@writefile{toc}{\contentsline {paragraph}{Expert Q-Learning:}{2}{section*.3}\protected@file@percent } -\@writefile{toc}{\contentsline {paragraph}{Policy Gradient Methods:}{2}{section*.4}\protected@file@percent } -\@writefile{toc}{\contentsline {paragraph}{Deep Reinforcement Learning:}{2}{section*.5}\protected@file@percent } -\@writefile{toc}{\contentsline {paragraph}{Temporal Networks:}{2}{section*.6}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {3}backgrounds}{2}{section.3}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Problem Statement and Foundational Concepts}{2}{subsection.3.1}\protected@file@percent } -\citation{2303.08631} -\citation{2303.08631} -\citation{2106.14642} -\citation{2303.08631} -\citation{2106.14642} -\citation{1703.02102} -\citation{1811.09013} -\citation{2209.01820} -\bibdata{ref} -\bibcite{0711.2185}{{1}{2007}{{Arie~Leizarowitz}}{{}}} -\bibcite{2303.08631}{{2}{2023}{{Barber}}{{}}} -\bibcite{1811.09013}{{3}{2018}{{Ehsan~Imani}}{{}}} -\bibcite{1511.02377}{{4}{2015}{{Ehud~Lehrer}}{{}}} -\bibcite{1708.05866}{{5}{2017}{{Kai~Arulkumaran}}{{}}} -\bibcite{1512.07669}{{6}{2015}{{Krishnamurthy}}{{}}} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Q-Learning and Related Algorithms}{3}{subsection.3.2}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Policy Gradient Methods}{3}{subsection.3.3}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Methodology and Evaluation Metrics}{3}{subsection.3.4}\protected@file@percent } -\bibcite{1911.04817}{{7}{2019}{{Kämmerer}}{{}}} -\bibcite{2106.14642}{{8}{2021}{{Li~Meng}}{{}}} -\bibcite{1709.05067}{{9}{2017}{{Mahipal~Jadeja}}{{}}} -\bibcite{2008.10426}{{10}{2020}{{Nathalie~Bertrand}}{{}}} -\bibcite{2108.11510}{{11}{2021}{{Ngan~Le}}{{}}} -\bibcite{1512.09075}{{12}{2015}{{Philip S.~Thomas}}{{}}} -\bibcite{2212.00253}{{13}{2022}{{Qiyue~Yin}}{{}}} -\bibcite{2012.01100}{{14}{2020}{{Rong~Zhu}}{{}}} -\bibcite{1906.10025}{{15}{2019}{{Sergey~Ivanov}}{{}}} -\bibcite{2209.01820}{{16}{2022}{{van Heeswijk}}{{}}} -\bibcite{2111.01334}{{17}{2021}{{Xiu-Xiu~Zhan}}{{}}} -\bibcite{1703.02102}{{18}{2017}{{Yemi~Okesanjo}}{{}}} -\bibstyle{iclr2022_conference} diff --git a/outputs/outputs_20230421_000752/main.bbl b/outputs/outputs_20230421_000752/main.bbl deleted file mode 100644 index 65120bc9a72092c489da3ad357cc2b02c0c94268..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/main.bbl +++ /dev/null @@ -1,122 +0,0 @@ -\begin{thebibliography}{18} -\providecommand{\natexlab}[1]{#1} -\providecommand{\url}[1]{\texttt{#1}} -\expandafter\ifx\csname urlstyle\endcsname\relax - \providecommand{\doi}[1]{doi: #1}\else - \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi - -\bibitem[Arie~Leizarowitz(2007)]{0711.2185} -Adam~Shwartz Arie~Leizarowitz. -\newblock Exact finite approximations of average-cost countable markov decision - processes. -\newblock \emph{arXiv preprint arXiv:0711.2185}, 2007. -\newblock URL \url{http://arxiv.org/abs/0711.2185v1}. - -\bibitem[Barber(2023)]{2303.08631} -David Barber. -\newblock Smoothed q-learning. -\newblock \emph{arXiv preprint arXiv:2303.08631}, 2023. -\newblock URL \url{http://arxiv.org/abs/2303.08631v1}. - -\bibitem[Ehsan~Imani(2018)]{1811.09013} -Martha~White Ehsan~Imani, Eric~Graves. -\newblock An off-policy policy gradient theorem using emphatic weightings. -\newblock \emph{arXiv preprint arXiv:1811.09013}, 2018. -\newblock URL \url{http://arxiv.org/abs/1811.09013v2}. - -\bibitem[Ehud~Lehrer(2015)]{1511.02377} -Omri N.~Solan Ehud~Lehrer, Eilon~Solan. -\newblock The value functions of markov decision processes. -\newblock \emph{arXiv preprint arXiv:1511.02377}, 2015. -\newblock URL \url{http://arxiv.org/abs/1511.02377v1}. - -\bibitem[Kai~Arulkumaran(2017)]{1708.05866} -Miles Brundage Anil Anthony~Bharath Kai~Arulkumaran, Marc Peter~Deisenroth. -\newblock A brief survey of deep reinforcement learning. -\newblock \emph{arXiv preprint arXiv:1708.05866}, 2017. -\newblock URL \url{http://arxiv.org/abs/1708.05866v2}. - -\bibitem[Krishnamurthy(2015)]{1512.07669} -Vikram Krishnamurthy. -\newblock Reinforcement learning: Stochastic approximation algorithms for - markov decision processes. -\newblock \emph{arXiv preprint arXiv:1512.07669}, 2015. -\newblock URL \url{http://arxiv.org/abs/1512.07669v1}. - -\bibitem[Kämmerer(2019)]{1911.04817} -Mattis~Manfred Kämmerer. -\newblock On policy gradients. -\newblock \emph{arXiv preprint arXiv:1911.04817}, 2019. -\newblock URL \url{http://arxiv.org/abs/1911.04817v1}. - -\bibitem[Li~Meng(2021)]{2106.14642} -Morten Goodwin Paal~Engelstad Li~Meng, Anis~Yazidi. -\newblock Expert q-learning: Deep reinforcement learning with coarse state - values from offline expert examples. -\newblock \emph{arXiv preprint arXiv:2106.14642}, 2021. -\newblock URL \url{http://arxiv.org/abs/2106.14642v3}. - -\bibitem[Mahipal~Jadeja(2017)]{1709.05067} -Agam~Shah Mahipal~Jadeja, Neelanshi~Varia. -\newblock Deep reinforcement learning for conversational ai. -\newblock \emph{arXiv preprint arXiv:1709.05067}, 2017. -\newblock URL \url{http://arxiv.org/abs/1709.05067v1}. - -\bibitem[Nathalie~Bertrand(2020)]{2008.10426} -Thomas Brihaye Paulin~Fournier Nathalie~Bertrand, Patricia~Bouyer. -\newblock Taming denumerable markov decision processes with decisiveness. -\newblock \emph{arXiv preprint arXiv:2008.10426}, 2020. -\newblock URL \url{http://arxiv.org/abs/2008.10426v1}. - -\bibitem[Ngan~Le(2021)]{2108.11510} -Kashu Yamazaki Khoa Luu Marios~Savvides Ngan~Le, Vidhiwar Singh~Rathour. -\newblock Deep reinforcement learning in computer vision: A comprehensive - survey. -\newblock \emph{arXiv preprint arXiv:2108.11510}, 2021. -\newblock URL \url{http://arxiv.org/abs/2108.11510v1}. - -\bibitem[Philip S.~Thomas(2015)]{1512.09075} -Billy~Okal Philip S.~Thomas. -\newblock A notation for markov decision processes. -\newblock \emph{arXiv preprint arXiv:1512.09075}, 2015. -\newblock URL \url{http://arxiv.org/abs/1512.09075v2}. - -\bibitem[Qiyue~Yin(2022)]{2212.00253} -Shengqi Shen Jun Yang Meijing Zhao Kaiqi Huang Bin Liang Liang~Wang Qiyue~Yin, - Tongtong~Yu. -\newblock Distributed deep reinforcement learning: A survey and a multi-player - multi-agent learning toolbox. -\newblock \emph{arXiv preprint arXiv:2212.00253}, 2022. -\newblock URL \url{http://arxiv.org/abs/2212.00253v1}. - -\bibitem[Rong~Zhu(2020)]{2012.01100} -Mattia~Rigotti Rong~Zhu. -\newblock Self-correcting q-learning. -\newblock \emph{arXiv preprint arXiv:2012.01100}, 2020. -\newblock URL \url{http://arxiv.org/abs/2012.01100v2}. - -\bibitem[Sergey~Ivanov(2019)]{1906.10025} -Alexander~D'yakonov Sergey~Ivanov. -\newblock Modern deep reinforcement learning algorithms. -\newblock \emph{arXiv preprint arXiv:1906.10025}, 2019. -\newblock URL \url{http://arxiv.org/abs/1906.10025v2}. - -\bibitem[van Heeswijk(2022)]{2209.01820} -W.~J.~A. van Heeswijk. -\newblock Natural policy gradients in reinforcement learning explained. -\newblock \emph{arXiv preprint arXiv:2209.01820}, 2022. -\newblock URL \url{http://arxiv.org/abs/2209.01820v1}. - -\bibitem[Xiu-Xiu~Zhan(2021)]{2111.01334} -Zhipeng Wang Huijuang Wang Petter Holme Zi-Ke~Zhang Xiu-Xiu~Zhan, Chuang~Liu. -\newblock Measuring and utilizing temporal network dissimilarity. -\newblock \emph{arXiv preprint arXiv:2111.01334}, 2021. -\newblock URL \url{http://arxiv.org/abs/2111.01334v1}. - -\bibitem[Yemi~Okesanjo(2017)]{1703.02102} -Victor~Kofia Yemi~Okesanjo. -\newblock Revisiting stochastic off-policy action-value gradients. -\newblock \emph{arXiv preprint arXiv:1703.02102}, 2017. -\newblock URL \url{http://arxiv.org/abs/1703.02102v2}. - -\end{thebibliography} diff --git a/outputs/outputs_20230421_000752/main.blg b/outputs/outputs_20230421_000752/main.blg deleted file mode 100644 index 7aed855ec3f86efc68093726e903c30045c77699..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/main.blg +++ /dev/null @@ -1,935 +0,0 @@ -This is BibTeX, Version 0.99d (TeX Live 2019/W32TeX) -Capacity: max_strings=200000, hash_size=200000, hash_prime=170003 -The top-level auxiliary file: main.aux -The style file: iclr2022_conference.bst -Database file #1: ref.bib -Repeated entry---line 19 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 53 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 71 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 103 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 121 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 137 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 169 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 187 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 203 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 219 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 253 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 271 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 287 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 303 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 319 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 353 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 371 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 387 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 403 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 419 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 437 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 471 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 489 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 505 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 521 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 537 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 555 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 571 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 605 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 623 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 639 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 655 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 671 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 689 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 705 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 755 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 773 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 789 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 805 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 821 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 839 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 855 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 889 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 921 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 939 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 955 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 971 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 987 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1005 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1021 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1055 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1071 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1103 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1121 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1137 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1153 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1169 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1187 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1203 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1237 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1253 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1269 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1301 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1319 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1335 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1351 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1367 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1385 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1401 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1435 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1451 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1467 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1483 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1515 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1533 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1549 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1565 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1581 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1599 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1615 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1649 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1665 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1681 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1697 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1713 of file ref.bib - : @article{1911.04817 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1745 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1763 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1779 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1795 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1811 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1829 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1845 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1879 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1895 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1911 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1927 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1943 of file ref.bib - : @article{1911.04817 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1959 of file ref.bib - : @article{2108.11510 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 1993 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2011 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2027 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2043 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2059 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2077 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2093 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2127 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2143 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2159 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2175 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2191 of file ref.bib - : @article{1911.04817 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2207 of file ref.bib - : @article{2108.11510 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2223 of file ref.bib - : @article{2212.00253 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2257 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2275 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2291 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2307 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2323 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2341 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2357 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2391 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2407 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2423 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2439 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2455 of file ref.bib - : @article{1911.04817 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2471 of file ref.bib - : @article{2108.11510 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2487 of file ref.bib - : @article{2212.00253 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2505 of file ref.bib - : @article{1709.05067 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2537 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2555 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2571 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2587 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2603 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2621 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2637 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2671 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2687 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2703 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2719 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2735 of file ref.bib - : @article{1911.04817 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2751 of file ref.bib - : @article{2108.11510 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2767 of file ref.bib - : @article{2212.00253 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2785 of file ref.bib - : @article{1709.05067 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2801 of file ref.bib - : @article{1708.05866 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2833 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2851 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2867 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2883 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2899 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2917 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2933 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2967 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2983 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 2999 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3015 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3031 of file ref.bib - : @article{1911.04817 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3047 of file ref.bib - : @article{2108.11510 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3063 of file ref.bib - : @article{2212.00253 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3081 of file ref.bib - : @article{1709.05067 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3097 of file ref.bib - : @article{1708.05866 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3113 of file ref.bib - : @article{1906.10025 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3145 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3163 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3179 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3195 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3211 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3229 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3245 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3279 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3295 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3311 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3327 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3343 of file ref.bib - : @article{1911.04817 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3359 of file ref.bib - : @article{2108.11510 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3375 of file ref.bib - : @article{2212.00253 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3393 of file ref.bib - : @article{1709.05067 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3409 of file ref.bib - : @article{1708.05866 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3425 of file ref.bib - : @article{1906.10025 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3441 of file ref.bib - : @article{2111.01334 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3473 of file ref.bib - : @article{1512.07669 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3491 of file ref.bib - : @article{1511.02377 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3507 of file ref.bib - : @article{1512.09075 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3523 of file ref.bib - : @article{2008.10426 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3539 of file ref.bib - : @article{0711.2185 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3557 of file ref.bib - : @article{2303.08631 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3573 of file ref.bib - : @article{2106.14642 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3607 of file ref.bib - : @article{2012.01100 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3623 of file ref.bib - : @article{1703.02102 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3639 of file ref.bib - : @article{2209.01820 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3655 of file ref.bib - : @article{1811.09013 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3671 of file ref.bib - : @article{1911.04817 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3687 of file ref.bib - : @article{2108.11510 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3703 of file ref.bib - : @article{2212.00253 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3721 of file ref.bib - : @article{1709.05067 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3737 of file ref.bib - : @article{1708.05866 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3753 of file ref.bib - : @article{1906.10025 - : , -I'm skipping whatever remains of this entry -Repeated entry---line 3769 of file ref.bib - : @article{2111.01334 - : , -I'm skipping whatever remains of this entry -Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath" for entry 1708.05866 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath" for entry 1708.05866 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad" for entry 2106.14642 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad" for entry 2106.14642 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier" for entry 2008.10426 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier" for entry 2008.10426 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2701 of file iclr2022_conference.bst -Too many commas in name 1 of "Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath" for entry 1708.05866 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath" for entry 1708.05866 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad" for entry 2106.14642 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad" for entry 2106.14642 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier" for entry 2008.10426 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier" for entry 2008.10426 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2865 of file iclr2022_conference.bst -Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334 -while executing---line 2865 of file iclr2022_conference.bst -You've used 18 entries, - 2773 wiz_defined-function locations, - 692 strings with 8040 characters, -and the built_in function-call counts, 5649 in all, are: -= -- 522 -> -- 180 -< -- 18 -+ -- 72 -- -- 54 -* -- 308 -:= -- 946 -add.period$ -- 72 -call.type$ -- 18 -change.case$ -- 72 -chr.to.int$ -- 18 -cite$ -- 36 -duplicate$ -- 342 -empty$ -- 541 -format.name$ -- 72 -if$ -- 1171 -int.to.chr$ -- 1 -int.to.str$ -- 1 -missing$ -- 18 -newline$ -- 116 -num.names$ -- 72 -pop$ -- 144 -preamble$ -- 1 -purify$ -- 54 -quote$ -- 0 -skip$ -- 235 -stack$ -- 0 -substring$ -- 36 -swap$ -- 18 -text.length$ -- 0 -text.prefix$ -- 0 -top$ -- 0 -type$ -- 198 -warning$ -- 0 -while$ -- 54 -width$ -- 0 -write$ -- 259 -(There were 248 error messages) diff --git a/outputs/outputs_20230421_000752/main.log b/outputs/outputs_20230421_000752/main.log deleted file mode 100644 index 7105fe576a00a00761a10aaccd09272e5c32bc31..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/main.log +++ /dev/null @@ -1,460 +0,0 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/W32TeX) (preloaded format=pdflatex 2020.3.10) 21 APR 2023 00:35 -entering extended mode - restricted \write18 enabled. - %&-line parsing enabled. -**main.tex -(./main.tex -LaTeX2e <2020-02-02> patch level 5 -L3 programming layer <2020-02-25> -(c:/texlive/2019/texmf-dist/tex/latex/base/article.cls -Document Class: article 2019/12/20 v1.4l Standard LaTeX document class -(c:/texlive/2019/texmf-dist/tex/latex/base/size10.clo -File: size10.clo 2019/12/20 v1.4l Standard LaTeX file (size option) -) -\c@part=\count167 -\c@section=\count168 -\c@subsection=\count169 -\c@subsubsection=\count170 -\c@paragraph=\count171 -\c@subparagraph=\count172 -\c@figure=\count173 -\c@table=\count174 -\abovecaptionskip=\skip47 -\belowcaptionskip=\skip48 -\bibindent=\dimen134 -) -(c:/texlive/2019/texmf-dist/tex/latex/graphics/graphicx.sty -Package: graphicx 2019/11/30 v1.2a Enhanced LaTeX Graphics (DPC,SPQR) - -(c:/texlive/2019/texmf-dist/tex/latex/graphics/keyval.sty -Package: keyval 2014/10/28 v1.15 key=value parser (DPC) -\KV@toks@=\toks15 -) -(c:/texlive/2019/texmf-dist/tex/latex/graphics/graphics.sty -Package: graphics 2019/11/30 v1.4a Standard LaTeX Graphics (DPC,SPQR) - -(c:/texlive/2019/texmf-dist/tex/latex/graphics/trig.sty -Package: trig 2016/01/03 v1.10 sin cos tan (DPC) -) -(c:/texlive/2019/texmf-dist/tex/latex/graphics-cfg/graphics.cfg -File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration -) -Package graphics Info: Driver file: pdftex.def on input line 105. - -(c:/texlive/2019/texmf-dist/tex/latex/graphics-def/pdftex.def -File: pdftex.def 2018/01/08 v1.0l Graphics/color driver for pdftex -)) -\Gin@req@height=\dimen135 -\Gin@req@width=\dimen136 -) -(c:/texlive/2019/texmf-dist/tex/latex/booktabs/booktabs.sty -Package: booktabs 2020/01/12 v1.61803398 Publication quality tables -\heavyrulewidth=\dimen137 -\lightrulewidth=\dimen138 -\cmidrulewidth=\dimen139 -\belowrulesep=\dimen140 -\belowbottomsep=\dimen141 -\aboverulesep=\dimen142 -\abovetopsep=\dimen143 -\cmidrulesep=\dimen144 -\cmidrulekern=\dimen145 -\defaultaddspace=\dimen146 -\@cmidla=\count175 -\@cmidlb=\count176 -\@aboverulesep=\dimen147 -\@belowrulesep=\dimen148 -\@thisruleclass=\count177 -\@lastruleclass=\count178 -\@thisrulewidth=\dimen149 -) -(./iclr2022_conference.sty -(c:/texlive/2019/texmf-dist/tex/latex/eso-pic/eso-pic.sty -Package: eso-pic 2018/04/12 v2.0h eso-pic (RN) - -(c:/texlive/2019/texmf-dist/tex/generic/atbegshi/atbegshi.sty -Package: atbegshi 2019/12/05 v1.19 At begin shipout hook (HO) - -(c:/texlive/2019/texmf-dist/tex/generic/infwarerr/infwarerr.sty -Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO) -) -(c:/texlive/2019/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty -Package: ltxcmds 2019/12/15 v1.24 LaTeX kernel commands for general use (HO) -) -(c:/texlive/2019/texmf-dist/tex/generic/iftex/iftex.sty -Package: iftex 2019/11/07 v1.0c TeX engine tests -)) -(c:/texlive/2019/texmf-dist/tex/latex/xcolor/xcolor.sty -Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK) - -(c:/texlive/2019/texmf-dist/tex/latex/graphics-cfg/color.cfg -File: color.cfg 2016/01/02 v1.6 sample color configuration -) -Package xcolor Info: Driver file: pdftex.def on input line 225. -Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348. -Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1352. -Package xcolor Info: Model `RGB' extended on input line 1364. -Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366. -Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367. -Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368. -Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369. -Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370. -Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371. -)) (./fancyhdr.sty -\fancy@headwidth=\skip49 -\f@ncyO@elh=\skip50 -\f@ncyO@erh=\skip51 -\f@ncyO@olh=\skip52 -\f@ncyO@orh=\skip53 -\f@ncyO@elf=\skip54 -\f@ncyO@erf=\skip55 -\f@ncyO@olf=\skip56 -\f@ncyO@orf=\skip57 -) (./natbib.sty -Package: natbib 2009/07/16 8.31 (PWD, AO) -\bibhang=\skip58 -\bibsep=\skip59 -LaTeX Info: Redefining \cite on input line 694. -\c@NAT@ctr=\count179 -)) (c:/texlive/2019/texmf-dist/tex/latex/psnfss/times.sty -Package: times 2005/04/12 PSNFSS-v9.2a (SPQR) -) -(./math_commands.tex (c:/texlive/2019/texmf-dist/tex/latex/amsmath/amsmath.sty -Package: amsmath 2020/01/20 v2.17e AMS math features -\@mathmargin=\skip60 - -For additional information on amsmath, use the `?' option. -(c:/texlive/2019/texmf-dist/tex/latex/amsmath/amstext.sty -Package: amstext 2000/06/29 v2.01 AMS text - -(c:/texlive/2019/texmf-dist/tex/latex/amsmath/amsgen.sty -File: amsgen.sty 1999/11/30 v2.0 generic functions -\@emptytoks=\toks16 -\ex@=\dimen150 -)) -(c:/texlive/2019/texmf-dist/tex/latex/amsmath/amsbsy.sty -Package: amsbsy 1999/11/29 v1.2d Bold Symbols -\pmbraise@=\dimen151 -) -(c:/texlive/2019/texmf-dist/tex/latex/amsmath/amsopn.sty -Package: amsopn 2016/03/08 v2.02 operator names -) -\inf@bad=\count180 -LaTeX Info: Redefining \frac on input line 227. -\uproot@=\count181 -\leftroot@=\count182 -LaTeX Info: Redefining \overline on input line 389. -\classnum@=\count183 -\DOTSCASE@=\count184 -LaTeX Info: Redefining \ldots on input line 486. -LaTeX Info: Redefining \dots on input line 489. -LaTeX Info: Redefining \cdots on input line 610. -\Mathstrutbox@=\box45 -\strutbox@=\box46 -\big@size=\dimen152 -LaTeX Font Info: Redeclaring font encoding OML on input line 733. -LaTeX Font Info: Redeclaring font encoding OMS on input line 734. -\macc@depth=\count185 -\c@MaxMatrixCols=\count186 -\dotsspace@=\muskip16 -\c@parentequation=\count187 -\dspbrk@lvl=\count188 -\tag@help=\toks17 -\row@=\count189 -\column@=\count190 -\maxfields@=\count191 -\andhelp@=\toks18 -\eqnshift@=\dimen153 -\alignsep@=\dimen154 -\tagshift@=\dimen155 -\tagwidth@=\dimen156 -\totwidth@=\dimen157 -\lineht@=\dimen158 -\@envbody=\toks19 -\multlinegap=\skip61 -\multlinetaggap=\skip62 -\mathdisplay@stack=\toks20 -LaTeX Info: Redefining \[ on input line 2859. -LaTeX Info: Redefining \] on input line 2860. -) -(c:/texlive/2019/texmf-dist/tex/latex/amsfonts/amsfonts.sty -Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support -\symAMSa=\mathgroup4 -\symAMSb=\mathgroup5 -LaTeX Font Info: Redeclaring math symbol \hbar on input line 98. -LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold' -(Font) U/euf/m/n --> U/euf/b/n on input line 106. -) -(c:/texlive/2019/texmf-dist/tex/latex/tools/bm.sty -Package: bm 2019/07/24 v1.2d Bold Symbol Support (DPC/FMi) -\symboldoperators=\mathgroup6 -\symboldletters=\mathgroup7 -\symboldsymbols=\mathgroup8 -LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 141. -LaTeX Info: Redefining \bm on input line 209. -) -LaTeX Font Info: Overwriting math alphabet `\mathsfit' in version `bold' -(Font) OT1/phv/m/sl --> OT1/phv/bx/n on input line 314. -) -(c:/texlive/2019/texmf-dist/tex/latex/hyperref/hyperref.sty -Package: hyperref 2020/01/14 v7.00d Hypertext links for LaTeX - -(c:/texlive/2019/texmf-dist/tex/latex/pdftexcmds/pdftexcmds.sty -Package: pdftexcmds 2019/11/24 v0.31 Utility functions of pdfTeX for LuaTeX (HO -) -Package pdftexcmds Info: \pdf@primitive is available. -Package pdftexcmds Info: \pdf@ifprimitive is available. -Package pdftexcmds Info: \pdfdraftmode found. -) -(c:/texlive/2019/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty -Package: kvsetkeys 2019/12/15 v1.18 Key value parser (HO) -) -(c:/texlive/2019/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty -Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO) -) -(c:/texlive/2019/texmf-dist/tex/generic/pdfescape/pdfescape.sty -Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO) -) -(c:/texlive/2019/texmf-dist/tex/latex/hycolor/hycolor.sty -Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO) -) -(c:/texlive/2019/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty -Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO) -) -(c:/texlive/2019/texmf-dist/tex/latex/auxhook/auxhook.sty -Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO) -) -(c:/texlive/2019/texmf-dist/tex/latex/kvoptions/kvoptions.sty -Package: kvoptions 2019/11/29 v3.13 Key value format for package options (HO) -) -\@linkdim=\dimen159 -\Hy@linkcounter=\count192 -\Hy@pagecounter=\count193 - -(c:/texlive/2019/texmf-dist/tex/latex/hyperref/pd1enc.def -File: pd1enc.def 2020/01/14 v7.00d Hyperref: PDFDocEncoding definition (HO) -) -(c:/texlive/2019/texmf-dist/tex/generic/intcalc/intcalc.sty -Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO) -) -(c:/texlive/2019/texmf-dist/tex/generic/etexcmds/etexcmds.sty -Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO) -) -\Hy@SavedSpaceFactor=\count194 -\pdfmajorversion=\count195 -Package hyperref Info: Hyper figures OFF on input line 4547. -Package hyperref Info: Link nesting OFF on input line 4552. -Package hyperref Info: Hyper index ON on input line 4555. -Package hyperref Info: Plain pages OFF on input line 4562. -Package hyperref Info: Backreferencing OFF on input line 4567. -Package hyperref Info: Implicit mode ON; LaTeX internals redefined. -Package hyperref Info: Bookmarks ON on input line 4800. -\c@Hy@tempcnt=\count196 - -(c:/texlive/2019/texmf-dist/tex/latex/url/url.sty -\Urlmuskip=\muskip17 -Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. -) -LaTeX Info: Redefining \url on input line 5159. -\XeTeXLinkMargin=\dimen160 - -(c:/texlive/2019/texmf-dist/tex/generic/bitset/bitset.sty -Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO) - -(c:/texlive/2019/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty -Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO -) -)) -\Fld@menulength=\count197 -\Field@Width=\dimen161 -\Fld@charsize=\dimen162 -Package hyperref Info: Hyper figures OFF on input line 6430. -Package hyperref Info: Link nesting OFF on input line 6435. -Package hyperref Info: Hyper index ON on input line 6438. -Package hyperref Info: backreferencing OFF on input line 6445. -Package hyperref Info: Link coloring OFF on input line 6450. -Package hyperref Info: Link coloring with OCG OFF on input line 6455. -Package hyperref Info: PDF/A mode OFF on input line 6460. -LaTeX Info: Redefining \ref on input line 6500. -LaTeX Info: Redefining \pageref on input line 6504. -\Hy@abspage=\count198 -\c@Item=\count199 -\c@Hfootnote=\count266 -) -Package hyperref Info: Driver (autodetected): hpdftex. - -(c:/texlive/2019/texmf-dist/tex/latex/hyperref/hpdftex.def -File: hpdftex.def 2020/01/14 v7.00d Hyperref driver for pdfTeX - -(c:/texlive/2019/texmf-dist/tex/latex/atveryend/atveryend.sty -Package: atveryend 2019-12-11 v1.11 Hooks at the very end of document (HO) -Package atveryend Info: \enddocument detected (standard20110627). -) -\Fld@listcount=\count267 -\c@bookmark@seq@number=\count268 - -(c:/texlive/2019/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty -Package: rerunfilecheck 2019/12/05 v1.9 Rerun checks for auxiliary files (HO) - -(c:/texlive/2019/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty -Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO) -) -Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2 -86. -) -\Hy@SectionHShift=\skip63 -) -(c:/texlive/2019/texmf-dist/tex/latex/algorithmicx/algorithmicx.sty -Package: algorithmicx 2005/04/27 v1.2 Algorithmicx - -(c:/texlive/2019/texmf-dist/tex/latex/base/ifthen.sty -Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC) -) -Document Style algorithmicx 1.2 - a greatly improved `algorithmic' style -\c@ALG@line=\count269 -\c@ALG@rem=\count270 -\c@ALG@nested=\count271 -\ALG@tlm=\skip64 -\ALG@thistlm=\skip65 -\c@ALG@Lnr=\count272 -\c@ALG@blocknr=\count273 -\c@ALG@storecount=\count274 -\c@ALG@tmpcounter=\count275 -\ALG@tmplength=\skip66 -) (c:/texlive/2019/texmf-dist/tex/latex/l3backend/l3backend-pdfmode.def -File: l3backend-pdfmode.def 2020-02-23 L3 backend support: PDF mode -\l__kernel_color_stack_int=\count276 -\l__pdf_internal_box=\box47 -) -(./main.aux) -\openout1 = `main.aux'. - -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 17. -LaTeX Font Info: ... okay on input line 17. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 17. -LaTeX Font Info: ... okay on input line 17. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 17. -LaTeX Font Info: ... okay on input line 17. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 17. -LaTeX Font Info: ... okay on input line 17. -LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 17. -LaTeX Font Info: ... okay on input line 17. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 17. -LaTeX Font Info: ... okay on input line 17. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 17. -LaTeX Font Info: ... okay on input line 17. -LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 17. -LaTeX Font Info: ... okay on input line 17. -LaTeX Font Info: Trying to load font information for OT1+ptm on input line 1 -7. - (c:/texlive/2019/texmf-dist/tex/latex/psnfss/ot1ptm.fd -File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm. -) -(c:/texlive/2019/texmf-dist/tex/context/base/mkii/supp-pdf.mkii -[Loading MPS to PDF converter (version 2006.09.02).] -\scratchcounter=\count277 -\scratchdimen=\dimen163 -\scratchbox=\box48 -\nofMPsegments=\count278 -\nofMParguments=\count279 -\everyMPshowfont=\toks21 -\MPscratchCnt=\count280 -\MPscratchDim=\dimen164 -\MPnumerator=\count281 -\makeMPintoPDFobject=\count282 -\everyMPtoPDFconversion=\toks22 -) (c:/texlive/2019/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty -Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf -Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4 -85. - -(c:/texlive/2019/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg -File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv -e -)) -\AtBeginShipoutBox=\box49 -Package hyperref Info: Link coloring OFF on input line 17. - -(c:/texlive/2019/texmf-dist/tex/latex/hyperref/nameref.sty -Package: nameref 2019/09/16 v2.46 Cross-referencing by name of section - -(c:/texlive/2019/texmf-dist/tex/latex/refcount/refcount.sty -Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO) -) -(c:/texlive/2019/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty -Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO) -) -\c@section@level=\count283 -) -LaTeX Info: Redefining \ref on input line 17. -LaTeX Info: Redefining \pageref on input line 17. -LaTeX Info: Redefining \nameref on input line 17. - -(./main.out) (./main.out) -\@outlinefile=\write3 -\openout3 = `main.out'. - -LaTeX Font Info: Trying to load font information for U+msa on input line 19. - - -(c:/texlive/2019/texmf-dist/tex/latex/amsfonts/umsa.fd -File: umsa.fd 2013/01/14 v3.01 AMS symbols A -) -LaTeX Font Info: Trying to load font information for U+msb on input line 19. - - -(c:/texlive/2019/texmf-dist/tex/latex/amsfonts/umsb.fd -File: umsb.fd 2013/01/14 v3.01 AMS symbols B -) (./abstract.tex) -(./introduction.tex -Missing character: There is no in font ptmr7t! -Missing character: There is no in font ptmr7t! -) (./related works.tex [1{c:/texlive/2019/texmf-var/fonts/map/pdftex/updmap/pdf -tex.map} - -]) (./backgrounds.tex [2]) (./methodology.tex) -(./experiments.tex) (./conclusion.tex) (./main.bbl -LaTeX Font Info: Trying to load font information for OT1+pcr on input line 1 -3. - -(c:/texlive/2019/texmf-dist/tex/latex/psnfss/ot1pcr.fd -File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr. -) [3] -Missing character: There is no in font ptmr7t! -Missing character: There is no in font ptmr7t! -) -Package atveryend Info: Empty hook `BeforeClearDocument' on input line 34. - [4] -Package atveryend Info: Empty hook `AfterLastShipout' on input line 34. - (./main.aux) -Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 34. -Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 34. -Package rerunfilecheck Info: File `main.out' has not changed. -(rerunfilecheck) Checksum: 318D5997BC6EC56C0035AD484222C237;470. -Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 34. - ) -Here is how much of TeX's memory you used: - 7981 strings out of 480994 - 109943 string characters out of 5916032 - 390206 words of memory out of 5000000 - 23274 multiletter control sequences out of 15000+600000 - 551097 words of font info for 60 fonts, out of 8000000 for 9000 - 1141 hyphenation exceptions out of 8191 - 40i,11n,49p,949b,440s stack positions out of 5000i,500n,10000p,200000b,80000s -{c:/texlive/2019/texmf-dist/fonts/enc/dvips/base/8r.enc} - -Output written on main.pdf (4 pages, 135265 bytes). -PDF statistics: - 247 PDF objects out of 1000 (max. 8388607) - 226 compressed objects within 3 object streams - 37 named destinations out of 1000 (max. 500000) - 57 words of extra memory for PDF output out of 10000 (max. 10000000) - diff --git a/outputs/outputs_20230421_000752/main.out b/outputs/outputs_20230421_000752/main.out deleted file mode 100644 index bb5e26452a9c19f923b3f6553b23750ebee13af4..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/main.out +++ /dev/null @@ -1,7 +0,0 @@ -\BOOKMARK [1][-]{section.1}{introduction}{}% 1 -\BOOKMARK [1][-]{section.2}{related works}{}% 2 -\BOOKMARK [1][-]{section.3}{backgrounds}{}% 3 -\BOOKMARK [2][-]{subsection.3.1}{Problem Statement and Foundational Concepts}{section.3}% 4 -\BOOKMARK [2][-]{subsection.3.2}{Q-Learning and Related Algorithms}{section.3}% 5 -\BOOKMARK [2][-]{subsection.3.3}{Policy Gradient Methods}{section.3}% 6 -\BOOKMARK [2][-]{subsection.3.4}{Methodology and Evaluation Metrics}{section.3}% 7 diff --git a/outputs/outputs_20230421_000752/main.pdf b/outputs/outputs_20230421_000752/main.pdf deleted file mode 100644 index 41e04c7bda380200cf2256e5633a41f257c014f5..0000000000000000000000000000000000000000 Binary files a/outputs/outputs_20230421_000752/main.pdf and /dev/null differ diff --git a/outputs/outputs_20230421_000752/main.synctex.gz b/outputs/outputs_20230421_000752/main.synctex.gz deleted file mode 100644 index 025ac47dbf478457d3f84befd8a255758610216a..0000000000000000000000000000000000000000 Binary files a/outputs/outputs_20230421_000752/main.synctex.gz and /dev/null differ diff --git a/outputs/outputs_20230421_000752/main.tex b/outputs/outputs_20230421_000752/main.tex deleted file mode 100644 index bae75ed9072270fd16494577c1eabb916b70af6f..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/main.tex +++ /dev/null @@ -1,34 +0,0 @@ -\documentclass{article} % For LaTeX2e -\UseRawInputEncoding -\usepackage{graphicx} -\usepackage{booktabs} -\usepackage{iclr2022_conference, times} -\input{math_commands.tex} -\usepackage{hyperref} -\usepackage{url} -\usepackage{algorithmicx} - -\title{A Survey on Reinforcement Learning} -\author{GPT-4} - -\newcommand{\fix}{\marginpar{FIX}} -\newcommand{\new}{\marginpar{NEW}} - -\begin{document} -\maketitle -\input{abstract.tex} -\input{introduction.tex} -\input{related works.tex} -\input{backgrounds.tex} -\input{methodology.tex} -\input{experiments.tex} -\input{conclusion.tex} - -\bibliography{ref} -\bibliographystyle{iclr2022_conference} - -%\appendix -%\section{Appendix} -%You may include other additional sections here. - -\end{document} diff --git a/outputs/outputs_20230421_000752/math_commands.tex b/outputs/outputs_20230421_000752/math_commands.tex deleted file mode 100644 index 0668f931945175ca8535db25cc27fa603920cc3c..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/math_commands.tex +++ /dev/null @@ -1,508 +0,0 @@ -%%%%% NEW MATH DEFINITIONS %%%%% - -\usepackage{amsmath,amsfonts,bm} - -% Mark sections of captions for referring to divisions of figures -\newcommand{\figleft}{{\em (Left)}} -\newcommand{\figcenter}{{\em (Center)}} -\newcommand{\figright}{{\em (Right)}} -\newcommand{\figtop}{{\em (Top)}} -\newcommand{\figbottom}{{\em (Bottom)}} -\newcommand{\captiona}{{\em (a)}} -\newcommand{\captionb}{{\em (b)}} -\newcommand{\captionc}{{\em (c)}} -\newcommand{\captiond}{{\em (d)}} - -% Highlight a newly defined term -\newcommand{\newterm}[1]{{\bf #1}} - - -% Figure reference, lower-case. -\def\figref#1{figure~\ref{#1}} -% Figure reference, capital. For start of sentence -\def\Figref#1{Figure~\ref{#1}} -\def\twofigref#1#2{figures \ref{#1} and \ref{#2}} -\def\quadfigref#1#2#3#4{figures \ref{#1}, \ref{#2}, \ref{#3} and \ref{#4}} -% Section reference, lower-case. -\def\secref#1{section~\ref{#1}} -% Section reference, capital. -\def\Secref#1{Section~\ref{#1}} -% Reference to two sections. -\def\twosecrefs#1#2{sections \ref{#1} and \ref{#2}} -% Reference to three sections. -\def\secrefs#1#2#3{sections \ref{#1}, \ref{#2} and \ref{#3}} -% Reference to an equation, lower-case. -\def\eqref#1{equation~\ref{#1}} -% Reference to an equation, upper case -\def\Eqref#1{Equation~\ref{#1}} -% A raw reference to an equation---avoid using if possible -\def\plaineqref#1{\ref{#1}} -% Reference to a chapter, lower-case. -\def\chapref#1{chapter~\ref{#1}} -% Reference to an equation, upper case. -\def\Chapref#1{Chapter~\ref{#1}} -% Reference to a range of chapters -\def\rangechapref#1#2{chapters\ref{#1}--\ref{#2}} -% Reference to an algorithm, lower-case. -\def\algref#1{algorithm~\ref{#1}} -% Reference to an algorithm, upper case. -\def\Algref#1{Algorithm~\ref{#1}} -\def\twoalgref#1#2{algorithms \ref{#1} and \ref{#2}} -\def\Twoalgref#1#2{Algorithms \ref{#1} and \ref{#2}} -% Reference to a part, lower case -\def\partref#1{part~\ref{#1}} -% Reference to a part, upper case -\def\Partref#1{Part~\ref{#1}} -\def\twopartref#1#2{parts \ref{#1} and \ref{#2}} - -\def\ceil#1{\lceil #1 \rceil} -\def\floor#1{\lfloor #1 \rfloor} -\def\1{\bm{1}} -\newcommand{\train}{\mathcal{D}} -\newcommand{\valid}{\mathcal{D_{\mathrm{valid}}}} -\newcommand{\test}{\mathcal{D_{\mathrm{test}}}} - -\def\eps{{\epsilon}} - - -% Random variables -\def\reta{{\textnormal{$\eta$}}} -\def\ra{{\textnormal{a}}} -\def\rb{{\textnormal{b}}} -\def\rc{{\textnormal{c}}} -\def\rd{{\textnormal{d}}} -\def\re{{\textnormal{e}}} -\def\rf{{\textnormal{f}}} -\def\rg{{\textnormal{g}}} -\def\rh{{\textnormal{h}}} -\def\ri{{\textnormal{i}}} -\def\rj{{\textnormal{j}}} -\def\rk{{\textnormal{k}}} -\def\rl{{\textnormal{l}}} -% rm is already a command, just don't name any random variables m -\def\rn{{\textnormal{n}}} -\def\ro{{\textnormal{o}}} -\def\rp{{\textnormal{p}}} -\def\rq{{\textnormal{q}}} -\def\rr{{\textnormal{r}}} -\def\rs{{\textnormal{s}}} -\def\rt{{\textnormal{t}}} -\def\ru{{\textnormal{u}}} -\def\rv{{\textnormal{v}}} -\def\rw{{\textnormal{w}}} -\def\rx{{\textnormal{x}}} -\def\ry{{\textnormal{y}}} -\def\rz{{\textnormal{z}}} - -% Random vectors -\def\rvepsilon{{\mathbf{\epsilon}}} -\def\rvtheta{{\mathbf{\theta}}} -\def\rva{{\mathbf{a}}} -\def\rvb{{\mathbf{b}}} -\def\rvc{{\mathbf{c}}} -\def\rvd{{\mathbf{d}}} -\def\rve{{\mathbf{e}}} -\def\rvf{{\mathbf{f}}} -\def\rvg{{\mathbf{g}}} -\def\rvh{{\mathbf{h}}} -\def\rvu{{\mathbf{i}}} -\def\rvj{{\mathbf{j}}} -\def\rvk{{\mathbf{k}}} -\def\rvl{{\mathbf{l}}} -\def\rvm{{\mathbf{m}}} -\def\rvn{{\mathbf{n}}} -\def\rvo{{\mathbf{o}}} -\def\rvp{{\mathbf{p}}} -\def\rvq{{\mathbf{q}}} -\def\rvr{{\mathbf{r}}} -\def\rvs{{\mathbf{s}}} -\def\rvt{{\mathbf{t}}} -\def\rvu{{\mathbf{u}}} -\def\rvv{{\mathbf{v}}} -\def\rvw{{\mathbf{w}}} -\def\rvx{{\mathbf{x}}} -\def\rvy{{\mathbf{y}}} -\def\rvz{{\mathbf{z}}} - -% Elements of random vectors -\def\erva{{\textnormal{a}}} -\def\ervb{{\textnormal{b}}} -\def\ervc{{\textnormal{c}}} -\def\ervd{{\textnormal{d}}} -\def\erve{{\textnormal{e}}} -\def\ervf{{\textnormal{f}}} -\def\ervg{{\textnormal{g}}} -\def\ervh{{\textnormal{h}}} -\def\ervi{{\textnormal{i}}} -\def\ervj{{\textnormal{j}}} -\def\ervk{{\textnormal{k}}} -\def\ervl{{\textnormal{l}}} -\def\ervm{{\textnormal{m}}} -\def\ervn{{\textnormal{n}}} -\def\ervo{{\textnormal{o}}} -\def\ervp{{\textnormal{p}}} -\def\ervq{{\textnormal{q}}} -\def\ervr{{\textnormal{r}}} -\def\ervs{{\textnormal{s}}} -\def\ervt{{\textnormal{t}}} -\def\ervu{{\textnormal{u}}} -\def\ervv{{\textnormal{v}}} -\def\ervw{{\textnormal{w}}} -\def\ervx{{\textnormal{x}}} -\def\ervy{{\textnormal{y}}} -\def\ervz{{\textnormal{z}}} - -% Random matrices -\def\rmA{{\mathbf{A}}} -\def\rmB{{\mathbf{B}}} -\def\rmC{{\mathbf{C}}} -\def\rmD{{\mathbf{D}}} -\def\rmE{{\mathbf{E}}} -\def\rmF{{\mathbf{F}}} -\def\rmG{{\mathbf{G}}} -\def\rmH{{\mathbf{H}}} -\def\rmI{{\mathbf{I}}} -\def\rmJ{{\mathbf{J}}} -\def\rmK{{\mathbf{K}}} -\def\rmL{{\mathbf{L}}} -\def\rmM{{\mathbf{M}}} -\def\rmN{{\mathbf{N}}} -\def\rmO{{\mathbf{O}}} -\def\rmP{{\mathbf{P}}} -\def\rmQ{{\mathbf{Q}}} -\def\rmR{{\mathbf{R}}} -\def\rmS{{\mathbf{S}}} -\def\rmT{{\mathbf{T}}} -\def\rmU{{\mathbf{U}}} -\def\rmV{{\mathbf{V}}} -\def\rmW{{\mathbf{W}}} -\def\rmX{{\mathbf{X}}} -\def\rmY{{\mathbf{Y}}} -\def\rmZ{{\mathbf{Z}}} - -% Elements of random matrices -\def\ermA{{\textnormal{A}}} -\def\ermB{{\textnormal{B}}} -\def\ermC{{\textnormal{C}}} -\def\ermD{{\textnormal{D}}} -\def\ermE{{\textnormal{E}}} -\def\ermF{{\textnormal{F}}} -\def\ermG{{\textnormal{G}}} -\def\ermH{{\textnormal{H}}} -\def\ermI{{\textnormal{I}}} -\def\ermJ{{\textnormal{J}}} -\def\ermK{{\textnormal{K}}} -\def\ermL{{\textnormal{L}}} -\def\ermM{{\textnormal{M}}} -\def\ermN{{\textnormal{N}}} -\def\ermO{{\textnormal{O}}} -\def\ermP{{\textnormal{P}}} -\def\ermQ{{\textnormal{Q}}} -\def\ermR{{\textnormal{R}}} -\def\ermS{{\textnormal{S}}} -\def\ermT{{\textnormal{T}}} -\def\ermU{{\textnormal{U}}} -\def\ermV{{\textnormal{V}}} -\def\ermW{{\textnormal{W}}} -\def\ermX{{\textnormal{X}}} -\def\ermY{{\textnormal{Y}}} -\def\ermZ{{\textnormal{Z}}} - -% Vectors -\def\vzero{{\bm{0}}} -\def\vone{{\bm{1}}} -\def\vmu{{\bm{\mu}}} -\def\vtheta{{\bm{\theta}}} -\def\va{{\bm{a}}} -\def\vb{{\bm{b}}} -\def\vc{{\bm{c}}} -\def\vd{{\bm{d}}} -\def\ve{{\bm{e}}} -\def\vf{{\bm{f}}} -\def\vg{{\bm{g}}} -\def\vh{{\bm{h}}} -\def\vi{{\bm{i}}} -\def\vj{{\bm{j}}} -\def\vk{{\bm{k}}} -\def\vl{{\bm{l}}} -\def\vm{{\bm{m}}} -\def\vn{{\bm{n}}} -\def\vo{{\bm{o}}} -\def\vp{{\bm{p}}} -\def\vq{{\bm{q}}} -\def\vr{{\bm{r}}} -\def\vs{{\bm{s}}} -\def\vt{{\bm{t}}} -\def\vu{{\bm{u}}} -\def\vv{{\bm{v}}} -\def\vw{{\bm{w}}} -\def\vx{{\bm{x}}} -\def\vy{{\bm{y}}} -\def\vz{{\bm{z}}} - -% Elements of vectors -\def\evalpha{{\alpha}} -\def\evbeta{{\beta}} -\def\evepsilon{{\epsilon}} -\def\evlambda{{\lambda}} -\def\evomega{{\omega}} -\def\evmu{{\mu}} -\def\evpsi{{\psi}} -\def\evsigma{{\sigma}} -\def\evtheta{{\theta}} -\def\eva{{a}} -\def\evb{{b}} -\def\evc{{c}} -\def\evd{{d}} -\def\eve{{e}} -\def\evf{{f}} -\def\evg{{g}} -\def\evh{{h}} -\def\evi{{i}} -\def\evj{{j}} -\def\evk{{k}} -\def\evl{{l}} -\def\evm{{m}} -\def\evn{{n}} -\def\evo{{o}} -\def\evp{{p}} -\def\evq{{q}} -\def\evr{{r}} -\def\evs{{s}} -\def\evt{{t}} -\def\evu{{u}} -\def\evv{{v}} -\def\evw{{w}} -\def\evx{{x}} -\def\evy{{y}} -\def\evz{{z}} - -% Matrix -\def\mA{{\bm{A}}} -\def\mB{{\bm{B}}} -\def\mC{{\bm{C}}} -\def\mD{{\bm{D}}} -\def\mE{{\bm{E}}} -\def\mF{{\bm{F}}} -\def\mG{{\bm{G}}} -\def\mH{{\bm{H}}} -\def\mI{{\bm{I}}} -\def\mJ{{\bm{J}}} -\def\mK{{\bm{K}}} -\def\mL{{\bm{L}}} -\def\mM{{\bm{M}}} -\def\mN{{\bm{N}}} -\def\mO{{\bm{O}}} -\def\mP{{\bm{P}}} -\def\mQ{{\bm{Q}}} -\def\mR{{\bm{R}}} -\def\mS{{\bm{S}}} -\def\mT{{\bm{T}}} -\def\mU{{\bm{U}}} -\def\mV{{\bm{V}}} -\def\mW{{\bm{W}}} -\def\mX{{\bm{X}}} -\def\mY{{\bm{Y}}} -\def\mZ{{\bm{Z}}} -\def\mBeta{{\bm{\beta}}} -\def\mPhi{{\bm{\Phi}}} -\def\mLambda{{\bm{\Lambda}}} -\def\mSigma{{\bm{\Sigma}}} - -% Tensor -\DeclareMathAlphabet{\mathsfit}{\encodingdefault}{\sfdefault}{m}{sl} -\SetMathAlphabet{\mathsfit}{bold}{\encodingdefault}{\sfdefault}{bx}{n} -\newcommand{\tens}[1]{\bm{\mathsfit{#1}}} -\def\tA{{\tens{A}}} -\def\tB{{\tens{B}}} -\def\tC{{\tens{C}}} -\def\tD{{\tens{D}}} -\def\tE{{\tens{E}}} -\def\tF{{\tens{F}}} -\def\tG{{\tens{G}}} -\def\tH{{\tens{H}}} -\def\tI{{\tens{I}}} -\def\tJ{{\tens{J}}} -\def\tK{{\tens{K}}} -\def\tL{{\tens{L}}} -\def\tM{{\tens{M}}} -\def\tN{{\tens{N}}} -\def\tO{{\tens{O}}} -\def\tP{{\tens{P}}} -\def\tQ{{\tens{Q}}} -\def\tR{{\tens{R}}} -\def\tS{{\tens{S}}} -\def\tT{{\tens{T}}} -\def\tU{{\tens{U}}} -\def\tV{{\tens{V}}} -\def\tW{{\tens{W}}} -\def\tX{{\tens{X}}} -\def\tY{{\tens{Y}}} -\def\tZ{{\tens{Z}}} - - -% Graph -\def\gA{{\mathcal{A}}} -\def\gB{{\mathcal{B}}} -\def\gC{{\mathcal{C}}} -\def\gD{{\mathcal{D}}} -\def\gE{{\mathcal{E}}} -\def\gF{{\mathcal{F}}} -\def\gG{{\mathcal{G}}} -\def\gH{{\mathcal{H}}} -\def\gI{{\mathcal{I}}} -\def\gJ{{\mathcal{J}}} -\def\gK{{\mathcal{K}}} -\def\gL{{\mathcal{L}}} -\def\gM{{\mathcal{M}}} -\def\gN{{\mathcal{N}}} -\def\gO{{\mathcal{O}}} -\def\gP{{\mathcal{P}}} -\def\gQ{{\mathcal{Q}}} -\def\gR{{\mathcal{R}}} -\def\gS{{\mathcal{S}}} -\def\gT{{\mathcal{T}}} -\def\gU{{\mathcal{U}}} -\def\gV{{\mathcal{V}}} -\def\gW{{\mathcal{W}}} -\def\gX{{\mathcal{X}}} -\def\gY{{\mathcal{Y}}} -\def\gZ{{\mathcal{Z}}} - -% Sets -\def\sA{{\mathbb{A}}} -\def\sB{{\mathbb{B}}} -\def\sC{{\mathbb{C}}} -\def\sD{{\mathbb{D}}} -% Don't use a set called E, because this would be the same as our symbol -% for expectation. -\def\sF{{\mathbb{F}}} -\def\sG{{\mathbb{G}}} -\def\sH{{\mathbb{H}}} -\def\sI{{\mathbb{I}}} -\def\sJ{{\mathbb{J}}} -\def\sK{{\mathbb{K}}} -\def\sL{{\mathbb{L}}} -\def\sM{{\mathbb{M}}} -\def\sN{{\mathbb{N}}} -\def\sO{{\mathbb{O}}} -\def\sP{{\mathbb{P}}} -\def\sQ{{\mathbb{Q}}} -\def\sR{{\mathbb{R}}} -\def\sS{{\mathbb{S}}} -\def\sT{{\mathbb{T}}} -\def\sU{{\mathbb{U}}} -\def\sV{{\mathbb{V}}} -\def\sW{{\mathbb{W}}} -\def\sX{{\mathbb{X}}} -\def\sY{{\mathbb{Y}}} -\def\sZ{{\mathbb{Z}}} - -% Entries of a matrix -\def\emLambda{{\Lambda}} -\def\emA{{A}} -\def\emB{{B}} -\def\emC{{C}} -\def\emD{{D}} -\def\emE{{E}} -\def\emF{{F}} -\def\emG{{G}} -\def\emH{{H}} -\def\emI{{I}} -\def\emJ{{J}} -\def\emK{{K}} -\def\emL{{L}} -\def\emM{{M}} -\def\emN{{N}} -\def\emO{{O}} -\def\emP{{P}} -\def\emQ{{Q}} -\def\emR{{R}} -\def\emS{{S}} -\def\emT{{T}} -\def\emU{{U}} -\def\emV{{V}} -\def\emW{{W}} -\def\emX{{X}} -\def\emY{{Y}} -\def\emZ{{Z}} -\def\emSigma{{\Sigma}} - -% entries of a tensor -% Same font as tensor, without \bm wrapper -\newcommand{\etens}[1]{\mathsfit{#1}} -\def\etLambda{{\etens{\Lambda}}} -\def\etA{{\etens{A}}} -\def\etB{{\etens{B}}} -\def\etC{{\etens{C}}} -\def\etD{{\etens{D}}} -\def\etE{{\etens{E}}} -\def\etF{{\etens{F}}} -\def\etG{{\etens{G}}} -\def\etH{{\etens{H}}} -\def\etI{{\etens{I}}} -\def\etJ{{\etens{J}}} -\def\etK{{\etens{K}}} -\def\etL{{\etens{L}}} -\def\etM{{\etens{M}}} -\def\etN{{\etens{N}}} -\def\etO{{\etens{O}}} -\def\etP{{\etens{P}}} -\def\etQ{{\etens{Q}}} -\def\etR{{\etens{R}}} -\def\etS{{\etens{S}}} -\def\etT{{\etens{T}}} -\def\etU{{\etens{U}}} -\def\etV{{\etens{V}}} -\def\etW{{\etens{W}}} -\def\etX{{\etens{X}}} -\def\etY{{\etens{Y}}} -\def\etZ{{\etens{Z}}} - -% The true underlying data generating distribution -\newcommand{\pdata}{p_{\rm{data}}} -% The empirical distribution defined by the training set -\newcommand{\ptrain}{\hat{p}_{\rm{data}}} -\newcommand{\Ptrain}{\hat{P}_{\rm{data}}} -% The model distribution -\newcommand{\pmodel}{p_{\rm{model}}} -\newcommand{\Pmodel}{P_{\rm{model}}} -\newcommand{\ptildemodel}{\tilde{p}_{\rm{model}}} -% Stochastic autoencoder distributions -\newcommand{\pencode}{p_{\rm{encoder}}} -\newcommand{\pdecode}{p_{\rm{decoder}}} -\newcommand{\precons}{p_{\rm{reconstruct}}} - -\newcommand{\laplace}{\mathrm{Laplace}} % Laplace distribution - -\newcommand{\E}{\mathbb{E}} -\newcommand{\Ls}{\mathcal{L}} -\newcommand{\R}{\mathbb{R}} -\newcommand{\emp}{\tilde{p}} -\newcommand{\lr}{\alpha} -\newcommand{\reg}{\lambda} -\newcommand{\rect}{\mathrm{rectifier}} -\newcommand{\softmax}{\mathrm{softmax}} -\newcommand{\sigmoid}{\sigma} -\newcommand{\softplus}{\zeta} -\newcommand{\KL}{D_{\mathrm{KL}}} -\newcommand{\Var}{\mathrm{Var}} -\newcommand{\standarderror}{\mathrm{SE}} -\newcommand{\Cov}{\mathrm{Cov}} -% Wolfram Mathworld says $L^2$ is for function spaces and $\ell^2$ is for vectors -% But then they seem to use $L^2$ for vectors throughout the site, and so does -% wikipedia. -\newcommand{\normlzero}{L^0} -\newcommand{\normlone}{L^1} -\newcommand{\normltwo}{L^2} -\newcommand{\normlp}{L^p} -\newcommand{\normmax}{L^\infty} - -\newcommand{\parents}{Pa} % See usage in notation.tex. Chosen to match Daphne's book. - -\DeclareMathOperator*{\argmax}{arg\,max} -\DeclareMathOperator*{\argmin}{arg\,min} - -\DeclareMathOperator{\sign}{sign} -\DeclareMathOperator{\Tr}{Tr} -\let\ab\allowbreak diff --git a/outputs/outputs_20230421_000752/methodology.tex b/outputs/outputs_20230421_000752/methodology.tex deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/outputs/outputs_20230421_000752/natbib.sty b/outputs/outputs_20230421_000752/natbib.sty deleted file mode 100644 index ff0d0b91b6ef41468c593a0ca40a81f9a183b055..0000000000000000000000000000000000000000 --- a/outputs/outputs_20230421_000752/natbib.sty +++ /dev/null @@ -1,1246 +0,0 @@ -%% -%% This is file `natbib.sty', -%% generated with the docstrip utility. -%% -%% The original source files were: -%% -%% natbib.dtx (with options: `package,all') -%% ============================================= -%% IMPORTANT NOTICE: -%% -%% This program can be redistributed and/or modified under the terms -%% of the LaTeX Project Public License Distributed from CTAN -%% archives in directory macros/latex/base/lppl.txt; either -%% version 1 of the License, or any later version. -%% -%% This is a generated file. -%% It may not be distributed without the original source file natbib.dtx. -%% -%% Full documentation can be obtained by LaTeXing that original file. -%% Only a few abbreviated comments remain here to describe the usage. -%% ============================================= -%% Copyright 1993-2009 Patrick W Daly -%% Max-Planck-Institut f\"ur Sonnensystemforschung -%% Max-Planck-Str. 2 -%% D-37191 Katlenburg-Lindau -%% Germany -%% E-mail: daly@mps.mpg.de -\NeedsTeXFormat{LaTeX2e}[1995/06/01] -\ProvidesPackage{natbib} - [2009/07/16 8.31 (PWD, AO)] - - % This package reimplements the LaTeX \cite command to be used for various - % citation styles, both author-year and numerical. It accepts BibTeX - % output intended for many other packages, and therefore acts as a - % general, all-purpose citation-style interface. - % - % With standard numerical .bst files, only numerical citations are - % possible. With an author-year .bst file, both numerical and - % author-year citations are possible. - % - % If author-year citations are selected, \bibitem must have one of the - % following forms: - % \bibitem[Jones et al.(1990)]{key}... - % \bibitem[Jones et al.(1990)Jones, Baker, and Williams]{key}... - % \bibitem[Jones et al., 1990]{key}... - % \bibitem[\protect\citeauthoryear{Jones, Baker, and Williams}{Jones - % et al.}{1990}]{key}... - % \bibitem[\protect\citeauthoryear{Jones et al.}{1990}]{key}... - % \bibitem[\protect\astroncite{Jones et al.}{1990}]{key}... - % \bibitem[\protect\citename{Jones et al., }1990]{key}... - % \harvarditem[Jones et al.]{Jones, Baker, and Williams}{1990}{key}... - % - % This is either to be made up manually, or to be generated by an - % appropriate .bst file with BibTeX. - % Author-year mode || Numerical mode - % Then, \citet{key} ==>> Jones et al. (1990) || Jones et al. [21] - % \citep{key} ==>> (Jones et al., 1990) || [21] - % Multiple citations as normal: - % \citep{key1,key2} ==>> (Jones et al., 1990; Smith, 1989) || [21,24] - % or (Jones et al., 1990, 1991) || [21,24] - % or (Jones et al., 1990a,b) || [21,24] - % \cite{key} is the equivalent of \citet{key} in author-year mode - % and of \citep{key} in numerical mode - % Full author lists may be forced with \citet* or \citep*, e.g. - % \citep*{key} ==>> (Jones, Baker, and Williams, 1990) - % Optional notes as: - % \citep[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2) - % \citep[e.g.,][]{key} ==>> (e.g., Jones et al., 1990) - % \citep[see][pg. 34]{key}==>> (see Jones et al., 1990, pg. 34) - % (Note: in standard LaTeX, only one note is allowed, after the ref. - % Here, one note is like the standard, two make pre- and post-notes.) - % \citealt{key} ==>> Jones et al. 1990 - % \citealt*{key} ==>> Jones, Baker, and Williams 1990 - % \citealp{key} ==>> Jones et al., 1990 - % \citealp*{key} ==>> Jones, Baker, and Williams, 1990 - % Additional citation possibilities (both author-year and numerical modes) - % \citeauthor{key} ==>> Jones et al. - % \citeauthor*{key} ==>> Jones, Baker, and Williams - % \citeyear{key} ==>> 1990 - % \citeyearpar{key} ==>> (1990) - % \citetext{priv. comm.} ==>> (priv. comm.) - % \citenum{key} ==>> 11 [non-superscripted] - % Note: full author lists depends on whether the bib style supports them; - % if not, the abbreviated list is printed even when full requested. - % - % For names like della Robbia at the start of a sentence, use - % \Citet{dRob98} ==>> Della Robbia (1998) - % \Citep{dRob98} ==>> (Della Robbia, 1998) - % \Citeauthor{dRob98} ==>> Della Robbia - % - % - % Citation aliasing is achieved with - % \defcitealias{key}{text} - % \citetalias{key} ==>> text - % \citepalias{key} ==>> (text) - % - % Defining the citation mode and punctual (citation style) - % \setcitestyle{} - % Example: \setcitestyle{square,semicolon} - % Alternatively: - % Use \bibpunct with 6 mandatory arguments: - % 1. opening bracket for citation - % 2. closing bracket - % 3. citation separator (for multiple citations in one \cite) - % 4. the letter n for numerical styles, s for superscripts - % else anything for author-year - % 5. punctuation between authors and date - % 6. punctuation between years (or numbers) when common authors missing - % One optional argument is the character coming before post-notes. It - % appears in square braces before all other arguments. May be left off. - % Example (and default) \bibpunct[, ]{(}{)}{;}{a}{,}{,} - % - % To make this automatic for a given bib style, named newbib, say, make - % a local configuration file, natbib.cfg, with the definition - % \newcommand{\bibstyle@newbib}{\bibpunct...} - % Then the \bibliographystyle{newbib} will cause \bibstyle@newbib to - % be called on THE NEXT LATEX RUN (via the aux file). - % - % Such preprogrammed definitions may be invoked anywhere in the text - % by calling \citestyle{newbib}. This is only useful if the style specified - % differs from that in \bibliographystyle. - % - % With \citeindextrue and \citeindexfalse, one can control whether the - % \cite commands make an automatic entry of the citation in the .idx - % indexing file. For this, \makeindex must also be given in the preamble. - % - % Package Options: (for selecting punctuation) - % round - round parentheses are used (default) - % square - square brackets are used [option] - % curly - curly braces are used {option} - % angle - angle brackets are used