shaocongma commited on
Commit
9d9ac18
1 Parent(s): d18c569

Add new template.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +11 -9
  2. latex_templates/{Summary → Default}/abstract.tex +0 -0
  3. latex_templates/{Summary → Default}/backgrounds.tex +0 -0
  4. latex_templates/{Summary → Default}/conclusion.tex +0 -0
  5. latex_templates/{Summary → Default}/experiments.tex +0 -0
  6. latex_templates/{Summary → Default}/fancyhdr.sty +0 -0
  7. latex_templates/Default/fig.png +0 -0
  8. latex_templates/{Summary → Default}/iclr2022_conference.bst +0 -0
  9. latex_templates/{Summary → Default}/iclr2022_conference.sty +0 -0
  10. latex_templates/{Summary → Default}/introduction.tex +0 -0
  11. latex_templates/{Summary → Default}/math_commands.tex +0 -0
  12. latex_templates/{Summary → Default}/methodology.tex +0 -0
  13. latex_templates/{Summary → Default}/natbib.sty +0 -0
  14. latex_templates/{Summary → Default}/related works.tex +0 -0
  15. {outputs/outputs_20230421_000752 → latex_templates/Default}/template.tex +8 -2
  16. latex_templates/Summary/template.tex +0 -33
  17. latex_templates/example_references.bib +0 -9
  18. outputs/outputs_20230421_000752/abstract.tex +0 -0
  19. outputs/outputs_20230421_000752/backgrounds.tex +0 -20
  20. outputs/outputs_20230421_000752/conclusion.tex +0 -0
  21. outputs/outputs_20230421_000752/experiments.tex +0 -0
  22. outputs/outputs_20230421_000752/fancyhdr.sty +0 -485
  23. outputs/outputs_20230421_000752/generation.log +0 -123
  24. outputs/outputs_20230421_000752/iclr2022_conference.bst +0 -1440
  25. outputs/outputs_20230421_000752/iclr2022_conference.sty +0 -245
  26. outputs/outputs_20230421_000752/introduction.tex +0 -10
  27. outputs/outputs_20230421_000752/main.aux +0 -92
  28. outputs/outputs_20230421_000752/main.bbl +0 -122
  29. outputs/outputs_20230421_000752/main.blg +0 -935
  30. outputs/outputs_20230421_000752/main.log +0 -460
  31. outputs/outputs_20230421_000752/main.out +0 -7
  32. outputs/outputs_20230421_000752/main.pdf +0 -0
  33. outputs/outputs_20230421_000752/main.synctex.gz +0 -0
  34. outputs/outputs_20230421_000752/main.tex +0 -34
  35. outputs/outputs_20230421_000752/math_commands.tex +0 -508
  36. outputs/outputs_20230421_000752/methodology.tex +0 -0
  37. outputs/outputs_20230421_000752/natbib.sty +0 -1246
  38. outputs/outputs_20230421_000752/ref.bib +0 -1908
  39. outputs/outputs_20230421_000752/related works.tex +0 -20
  40. outputs/outputs_20230421_012207/abstract.tex +0 -0
  41. outputs/outputs_20230421_012207/backgrounds.tex +0 -16
  42. outputs/outputs_20230421_012207/conclusion.tex +0 -0
  43. outputs/outputs_20230421_012207/experiments.tex +0 -0
  44. outputs/outputs_20230421_012207/fancyhdr.sty +0 -485
  45. outputs/outputs_20230421_012207/generation.log +0 -105
  46. outputs/outputs_20230421_012207/iclr2022_conference.bst +0 -1440
  47. outputs/outputs_20230421_012207/iclr2022_conference.sty +0 -245
  48. outputs/outputs_20230421_012207/introduction.tex +0 -10
  49. outputs/outputs_20230421_012207/main.aux +0 -79
  50. outputs/outputs_20230421_012207/main.bbl +0 -72
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import os
3
  import openai
4
  from auto_backgrounds import generate_backgrounds, generate_draft
5
- from utils.file_operations import hash_name
6
  from references_generator import generate_top_k_references
7
 
8
  # todo:
@@ -42,6 +42,8 @@ else:
42
  except Exception as e:
43
  IS_OPENAI_API_KEY_AVAILABLE = False
44
 
 
 
45
 
46
  def clear_inputs(*args):
47
  return "", ""
@@ -108,7 +110,7 @@ theme = gr.themes.Default(font=gr.themes.GoogleFont("Questrial"))
108
  ACADEMIC_PAPER = """## 一键生成论文初稿
109
 
110
  1. 在Title文本框中输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning).
111
- 2. 点击Submit. 等待大概十分钟.
112
  3. 在右侧下载.zip格式的输出,在Overleaf上编译浏览.
113
  """
114
 
@@ -146,6 +148,10 @@ with gr.Blocks(theme=theme) as demo:
146
  本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_draft功能的测试.
147
  通过输入想要生成的论文名称(比如Playing atari with deep reinforcement learning),即可由AI辅助生成论文模板.
148
 
 
 
 
 
149
  ***2023-05-17 Update***: 我的API的余额用完了, 所以这个月不再能提供GPT-4的API Key. 这里为大家提供了一个位置输入OpenAI API Key. 同时也提供了GPT-3.5的兼容. 欢迎大家自行体验.
150
 
151
  如果有更多想法和建议欢迎加入QQ群里交流, 如果我在Space里更新了Key我会第一时间通知大家. 群号: ***249738228***.
@@ -170,9 +176,9 @@ with gr.Blocks(theme=theme) as demo:
170
  description_pp = gr.Textbox(lines=5, label="Description (Optional)", visible=True,
171
  info="对希望生成的论文的一些描述. 包括这篇论文的创新点, 主要贡献, 等.")
172
  with gr.Row():
173
- template = gr.Dropdown(label="Template", choices=["ICLR2022"], value="ICLR2022",
174
- interactive=False,
175
- info="生成论文的参考模板. (暂不支持修改)")
176
  model_selection = gr.Dropdown(label="Model", choices=["gpt-4", "gpt-3.5-turbo"],
177
  value="gpt-3.5-turbo",
178
  interactive=True,
@@ -202,10 +208,6 @@ with gr.Blocks(theme=theme) as demo:
202
  ''')
203
  bibtex_file = gr.File(label="Upload .bib file", file_types=["text"],
204
  interactive=True)
205
- gr.Examples(
206
- examples=["latex_templates/example_references.bib"],
207
- inputs=bibtex_file
208
- )
209
 
210
  with gr.Row():
211
  with gr.Column(scale=1):
 
2
  import os
3
  import openai
4
  from auto_backgrounds import generate_backgrounds, generate_draft
5
+ from utils.file_operations import hash_name, list_folders
6
  from references_generator import generate_top_k_references
7
 
8
  # todo:
 
42
  except Exception as e:
43
  IS_OPENAI_API_KEY_AVAILABLE = False
44
 
45
+ ALL_TEMPLATES = list_folders("latex_templates")
46
+
47
 
48
  def clear_inputs(*args):
49
  return "", ""
 
110
  ACADEMIC_PAPER = """## 一键生成论文初稿
111
 
112
  1. 在Title文本框中输入想要生成的论文名称(比如Playing Atari with Deep Reinforcement Learning).
113
+ 2. 点击Submit. 等待大概十五分钟(全文).
114
  3. 在右侧下载.zip格式的输出,在Overleaf上编译浏览.
115
  """
116
 
 
148
  本Demo提供对[Auto-Draft](https://github.com/CCCBora/auto-draft)的auto_draft功能的测试.
149
  通过输入想要生成的论文名称(比如Playing atari with deep reinforcement learning),即可由AI辅助生成论文模板.
150
 
151
+ ***2023-06-08 Update***:
152
+ * 目前对英文的生成效果更好. 如果需要中文文章可以使用[GPT学术优化](https://github.com/binary-husky/gpt_academic)的`Latex全文翻译、润色`功能.
153
+ * 支持
154
+
155
  ***2023-05-17 Update***: 我的API的余额用完了, 所以这个月不再能提供GPT-4的API Key. 这里为大家提供了一个位置输入OpenAI API Key. 同时也提供了GPT-3.5的兼容. 欢迎大家自行体验.
156
 
157
  如果有更多想法和建议欢迎加入QQ群里交流, 如果我在Space里更新了Key我会第一时间通知大家. 群号: ***249738228***.
 
176
  description_pp = gr.Textbox(lines=5, label="Description (Optional)", visible=True,
177
  info="对希望生成的论文的一些描述. 包括这篇论文的创新点, 主要贡献, 等.")
178
  with gr.Row():
179
+ template = gr.Dropdown(label="Template", choices=ALL_TEMPLATES, value="Default",
180
+ interactive=True,
181
+ info="生成论文的参考模板.")
182
  model_selection = gr.Dropdown(label="Model", choices=["gpt-4", "gpt-3.5-turbo"],
183
  value="gpt-3.5-turbo",
184
  interactive=True,
 
208
  ''')
209
  bibtex_file = gr.File(label="Upload .bib file", file_types=["text"],
210
  interactive=True)
 
 
 
 
211
 
212
  with gr.Row():
213
  with gr.Column(scale=1):
latex_templates/{Summary → Default}/abstract.tex RENAMED
File without changes
latex_templates/{Summary → Default}/backgrounds.tex RENAMED
File without changes
latex_templates/{Summary → Default}/conclusion.tex RENAMED
File without changes
latex_templates/{Summary → Default}/experiments.tex RENAMED
File without changes
latex_templates/{Summary → Default}/fancyhdr.sty RENAMED
File without changes
latex_templates/Default/fig.png ADDED
latex_templates/{Summary → Default}/iclr2022_conference.bst RENAMED
File without changes
latex_templates/{Summary → Default}/iclr2022_conference.sty RENAMED
File without changes
latex_templates/{Summary → Default}/introduction.tex RENAMED
File without changes
latex_templates/{Summary → Default}/math_commands.tex RENAMED
File without changes
latex_templates/{Summary → Default}/methodology.tex RENAMED
File without changes
latex_templates/{Summary → Default}/natbib.sty RENAMED
File without changes
latex_templates/{Summary → Default}/related works.tex RENAMED
File without changes
{outputs/outputs_20230421_000752 → latex_templates/Default}/template.tex RENAMED
@@ -2,11 +2,17 @@
2
  \UseRawInputEncoding
3
  \usepackage{graphicx}
4
  \usepackage{booktabs}
5
- \usepackage{iclr2022_conference, times}
 
 
 
 
 
6
  \input{math_commands.tex}
7
  \usepackage{hyperref}
8
  \usepackage{url}
9
- \usepackage{algorithmicx}
 
10
 
11
  \title{TITLE}
12
  \author{GPT-4}
 
2
  \UseRawInputEncoding
3
  \usepackage{graphicx}
4
  \usepackage{booktabs}
5
+ \usepackage{times}
6
+ \usepackage{eso-pic} % used by \AddToShipoutPicture
7
+ \RequirePackage{fancyhdr}
8
+ \RequirePackage{natbib}
9
+ \usepackage{fullpage}
10
+
11
  \input{math_commands.tex}
12
  \usepackage{hyperref}
13
  \usepackage{url}
14
+ \usepackage{algorithm}
15
+ \usepackage{algpseudocode}
16
 
17
  \title{TITLE}
18
  \author{GPT-4}
latex_templates/Summary/template.tex DELETED
@@ -1,33 +0,0 @@
1
- \documentclass{article} % For LaTeX2e
2
- \UseRawInputEncoding
3
- \usepackage{graphicx}
4
- \usepackage{booktabs}
5
- \input{math_commands.tex}
6
- \usepackage{hyperref}
7
- \usepackage{url}
8
- \usepackage{algorithmicx}
9
-
10
- \title{TITLE}
11
- \author{GPT-4}
12
-
13
- \newcommand{\fix}{\marginpar{FIX}}
14
- \newcommand{\new}{\marginpar{NEW}}
15
-
16
- \begin{document}
17
- \maketitle
18
- \input{abstract.tex}
19
- \input{introduction.tex}
20
- \input{related works.tex}
21
- \input{backgrounds.tex}
22
- \input{methodology.tex}
23
- \input{experiments.tex}
24
- \input{conclusion.tex}
25
-
26
- \bibliography{ref}
27
- \bibliographystyle{abbrv}
28
-
29
- %\appendix
30
- %\section{Appendix}
31
- %You may include other additional sections here.
32
-
33
- \end{document}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
latex_templates/example_references.bib DELETED
@@ -1,9 +0,0 @@
1
- @inproceedings{ma2020understanding,
2
- title={Understanding the impact of model incoherence on convergence of incremental sgd with random reshuffle},
3
- author={Ma, Shaocong and Zhou, Yi},
4
- booktitle={International Conference on Machine Learning},
5
- pages={6565--6574},
6
- year={2020},
7
- organization={PMLR},
8
- abstract={Although SGD with random reshuffle has been widely-used in machine learning applications, there is a limited understanding of how model characteristics affect the convergence of the algorithm. In this work, we introduce model incoherence to characterize the diversity of model characteristics and study its impact on convergence of SGD with random reshuffle under weak strong convexity. Specifically, minimizer incoherence measures the discrepancy between the global minimizers of a sample loss and those of the total loss and affects the convergence error of SGD with random reshuffle. In particular, we show that the variable sequence generated by SGD with random reshuffle converges to a certain global minimizer of the total loss under full minimizer coherence. The other curvature incoherence measures the quality of condition numbers of the sample losses and determines the convergence rate of SGD. With model incoherence, our results show that SGD has a faster convergence rate and smaller convergence error under random reshuffle than those under random sampling, and hence provide justifications to the superior practical performance of SGD with random reshuffle.}
9
- }
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/abstract.tex DELETED
File without changes
outputs/outputs_20230421_000752/backgrounds.tex DELETED
@@ -1,20 +0,0 @@
1
- \section{backgrounds}
2
- \subsection{Problem Statement and Foundational Concepts}
3
-
4
- Reinforcement Learning (RL) is a subfield of machine learning that focuses on training agents to make decisions in an environment to maximize a cumulative reward signal. In RL, an agent interacts with an environment through a sequence of actions, observations, and rewards, aiming to learn an optimal policy that maps states to actions \cite{1512.09075}. The problem can be formalized as a Markov Decision Process (MDP), which is defined by a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{1511.02377}. The goal of RL is to find a policy $\pi(a|s)$ that maximizes the expected cumulative reward, defined as $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $R_{t+k+1}$ is the reward received at time step $t+k+1$ \cite{1512.07669}.
5
-
6
- \subsection{Q-Learning and Related Algorithms}
7
-
8
- Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected cumulative reward of taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by:
9
-
10
- \[Q(s, a) \leftarrow Q(s, a) + \alpha \left[ R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a) \right],\]
11
-
12
- where $\alpha$ is the learning rate, $R(s, a)$ is the reward for taking action $a$ in state $s$, and $s'$ is the next state \cite{2303.08631}. However, Q-learning can suffer from overestimation bias, which can lead to suboptimal performance \cite{2106.14642}. To address this issue, Double Q-learning was proposed, which uses two separate Q-value estimators and updates them alternately, mitigating overestimation bias while maintaining convergence guarantees \cite{2303.08631}. Another variant, Expert Q-learning, incorporates semi-supervised learning by splitting Q-values into state values and action advantages, and using an expert network to assess the value of states \cite{2106.14642}.
13
-
14
- \subsection{Policy Gradient Methods}
15
-
16
- Policy gradient methods are another class of RL algorithms that optimize the policy directly by estimating the gradient of the expected cumulative reward with respect to the policy parameters \cite{1703.02102}. The policy gradient theorem provides a simplified form for the gradient, which can be used to derive on-policy and off-policy algorithms \cite{1811.09013}. Natural policy gradients, which incorporate second-order information to improve convergence, form the foundation for state-of-the-art algorithms like Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}.
17
-
18
- \subsection{Methodology and Evaluation Metrics}
19
-
20
- In this paper, we will explore various RL algorithms, focusing on Q-learning and its variants, as well as policy gradient methods. We will delve into their theoretical foundations, convergence properties, and practical limitations. To assess the performance of these algorithms, we will use evaluation metrics such as cumulative reward, convergence speed, and sample efficiency. By comparing the performance of different algorithms, we aim to provide insights into their strengths and weaknesses, and identify potential areas for improvement and future research directions.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/conclusion.tex DELETED
File without changes
outputs/outputs_20230421_000752/experiments.tex DELETED
File without changes
outputs/outputs_20230421_000752/fancyhdr.sty DELETED
@@ -1,485 +0,0 @@
1
- % fancyhdr.sty version 3.2
2
- % Fancy headers and footers for LaTeX.
3
- % Piet van Oostrum,
4
- % Dept of Computer and Information Sciences, University of Utrecht,
5
- % Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
6
- % Telephone: +31 30 2532180. Email: piet@cs.uu.nl
7
- % ========================================================================
8
- % LICENCE:
9
- % This file may be distributed under the terms of the LaTeX Project Public
10
- % License, as described in lppl.txt in the base LaTeX distribution.
11
- % Either version 1 or, at your option, any later version.
12
- % ========================================================================
13
- % MODIFICATION HISTORY:
14
- % Sep 16, 1994
15
- % version 1.4: Correction for use with \reversemargin
16
- % Sep 29, 1994:
17
- % version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands
18
- % Oct 4, 1994:
19
- % version 1.6: Reset single spacing in headers/footers for use with
20
- % setspace.sty or doublespace.sty
21
- % Oct 4, 1994:
22
- % version 1.7: changed \let\@mkboth\markboth to
23
- % \def\@mkboth{\protect\markboth} to make it more robust
24
- % Dec 5, 1994:
25
- % version 1.8: corrections for amsbook/amsart: define \@chapapp and (more
26
- % importantly) use the \chapter/sectionmark definitions from ps@headings if
27
- % they exist (which should be true for all standard classes).
28
- % May 31, 1995:
29
- % version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage...
30
- % construction in the doc did not work properly with the fancyplain style.
31
- % June 1, 1995:
32
- % version 1.91: The definition of \@mkboth wasn't restored on subsequent
33
- % \pagestyle{fancy}'s.
34
- % June 1, 1995:
35
- % version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain}
36
- % \pagestyle{fancy} would erroneously select the plain version.
37
- % June 1, 1995:
38
- % version 1.93: \fancypagestyle command added.
39
- % Dec 11, 1995:
40
- % version 1.94: suggested by Conrad Hughes <chughes@maths.tcd.ie>
41
- % CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule
42
- % position (old hardcoded value of .3\normalbaselineskip is far too high
43
- % when used with very small footer fonts).
44
- % Jan 31, 1996:
45
- % version 1.95: call \@normalsize in the reset code if that is defined,
46
- % otherwise \normalsize.
47
- % this is to solve a problem with ucthesis.cls, as this doesn't
48
- % define \@currsize. Unfortunately for latex209 calling \normalsize doesn't
49
- % work as this is optimized to do very little, so there \@normalsize should
50
- % be called. Hopefully this code works for all versions of LaTeX known to
51
- % mankind.
52
- % April 25, 1996:
53
- % version 1.96: initialize \headwidth to a magic (negative) value to catch
54
- % most common cases that people change it before calling \pagestyle{fancy}.
55
- % Note it can't be initialized when reading in this file, because
56
- % \textwidth could be changed afterwards. This is quite probable.
57
- % We also switch to \MakeUppercase rather than \uppercase and introduce a
58
- % \nouppercase command for use in headers. and footers.
59
- % May 3, 1996:
60
- % version 1.97: Two changes:
61
- % 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults
62
- % for the chapter and section marks. The current version of amsbook and
63
- % amsart classes don't seem to need them anymore. Moreover the standard
64
- % latex classes don't use \markboth if twoside isn't selected, and this is
65
- % confusing as \leftmark doesn't work as expected.
66
- % 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem
67
- % in the amsbook and amsart classes, that make global changes to \topskip,
68
- % which are reset in \ps@empty. Hopefully this doesn't break other things.
69
- % May 7, 1996:
70
- % version 1.98:
71
- % Added % after the line \def\nouppercase
72
- % May 7, 1996:
73
- % version 1.99: This is the alpha version of fancyhdr 2.0
74
- % Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf.
75
- % Changed \headrulewidth, \footrulewidth, \footruleskip to
76
- % macros rather than length parameters, In this way they can be
77
- % conditionalized and they don't consume length registers. There is no need
78
- % to have them as length registers unless you want to do calculations with
79
- % them, which is unlikely. Note that this may make some uses of them
80
- % incompatible (i.e. if you have a file that uses \setlength or \xxxx=)
81
- % May 10, 1996:
82
- % version 1.99a:
83
- % Added a few more % signs
84
- % May 10, 1996:
85
- % version 1.99b:
86
- % Changed the syntax of \f@nfor to be resistent to catcode changes of :=
87
- % Removed the [1] from the defs of \lhead etc. because the parameter is
88
- % consumed by the \@[xy]lhead etc. macros.
89
- % June 24, 1997:
90
- % version 1.99c:
91
- % corrected \nouppercase to also include the protected form of \MakeUppercase
92
- % \global added to manipulation of \headwidth.
93
- % \iffootnote command added.
94
- % Some comments added about \@fancyhead and \@fancyfoot.
95
- % Aug 24, 1998
96
- % version 1.99d
97
- % Changed the default \ps@empty to \ps@@empty in order to allow
98
- % \fancypagestyle{empty} redefinition.
99
- % Oct 11, 2000
100
- % version 2.0
101
- % Added LPPL license clause.
102
- %
103
- % A check for \headheight is added. An errormessage is given (once) if the
104
- % header is too large. Empty headers don't generate the error even if
105
- % \headheight is very small or even 0pt.
106
- % Warning added for the use of 'E' option when twoside option is not used.
107
- % In this case the 'E' fields will never be used.
108
- %
109
- % Mar 10, 2002
110
- % version 2.1beta
111
- % New command: \fancyhfoffset[place]{length}
112
- % defines offsets to be applied to the header/footer to let it stick into
113
- % the margins (if length > 0).
114
- % place is like in fancyhead, except that only E,O,L,R can be used.
115
- % This replaces the old calculation based on \headwidth and the marginpar
116
- % area.
117
- % \headwidth will be dynamically calculated in the headers/footers when
118
- % this is used.
119
- %
120
- % Mar 26, 2002
121
- % version 2.1beta2
122
- % \fancyhfoffset now also takes h,f as possible letters in the argument to
123
- % allow the header and footer widths to be different.
124
- % New commands \fancyheadoffset and \fancyfootoffset added comparable to
125
- % \fancyhead and \fancyfoot.
126
- % Errormessages and warnings have been made more informative.
127
- %
128
- % Dec 9, 2002
129
- % version 2.1
130
- % The defaults for \footrulewidth, \plainheadrulewidth and
131
- % \plainfootrulewidth are changed from \z@skip to 0pt. In this way when
132
- % someone inadvertantly uses \setlength to change any of these, the value
133
- % of \z@skip will not be changed, rather an errormessage will be given.
134
-
135
- % March 3, 2004
136
- % Release of version 3.0
137
-
138
- % Oct 7, 2004
139
- % version 3.1
140
- % Added '\endlinechar=13' to \fancy@reset to prevent problems with
141
- % includegraphics in header when verbatiminput is active.
142
-
143
- % March 22, 2005
144
- % version 3.2
145
- % reset \everypar (the real one) in \fancy@reset because spanish.ldf does
146
- % strange things with \everypar between << and >>.
147
-
148
- \def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty}
149
-
150
- \def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else
151
- \fancy@gbl\def#1{#2\strut}\fi}
152
-
153
- \let\fancy@gbl\global
154
-
155
- \def\@fancyerrmsg#1{%
156
- \ifx\PackageError\undefined
157
- \errmessage{#1}\else
158
- \PackageError{Fancyhdr}{#1}{}\fi}
159
- \def\@fancywarning#1{%
160
- \ifx\PackageWarning\undefined
161
- \errmessage{#1}\else
162
- \PackageWarning{Fancyhdr}{#1}{}\fi}
163
-
164
- % Usage: \@forc \var{charstring}{command to be executed for each char}
165
- % This is similar to LaTeX's \@tfor, but expands the charstring.
166
-
167
- \def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}}
168
- \def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else
169
- \f@@rc#1#2\f@@rc{#3}\fi}
170
- \def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}}
171
-
172
- % Usage: \f@nfor\name:=list\do{body}
173
- % Like LaTeX's \@for but an empty list is treated as a list with an empty
174
- % element
175
-
176
- \newcommand{\f@nfor}[3]{\edef\@fortmp{#2}%
177
- \expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}}
178
-
179
- % Usage: \def@ult \cs{defaults}{argument}
180
- % sets \cs to the characters from defaults appearing in argument
181
- % or defaults if it would be empty. All characters are lowercased.
182
-
183
- \newcommand\def@ult[3]{%
184
- \edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a
185
- \def#1{}%
186
- \@forc\tmpf@ra{#2}%
187
- {\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}%
188
- \ifx\@empty#1\def#1{#2}\fi}
189
- %
190
- % \if@in <char><set><truecase><falsecase>
191
- %
192
- \newcommand{\if@in}[4]{%
193
- \edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}%
194
- \expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi}
195
-
196
- \newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}%
197
- {\f@ncyhf\fancyhead h[]}}
198
- \newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}%
199
- {\f@ncyhf\fancyfoot f[]}}
200
- \newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}%
201
- {\f@ncyhf\fancyhf{}[]}}
202
-
203
- % New commands for offsets added
204
-
205
- \newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}%
206
- {\f@ncyhfoffs\fancyheadoffset h[]}}
207
- \newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}%
208
- {\f@ncyhfoffs\fancyfootoffset f[]}}
209
- \newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}%
210
- {\f@ncyhfoffs\fancyhfoffset{}[]}}
211
-
212
- % The header and footer fields are stored in command sequences with
213
- % names of the form: \f@ncy<x><y><z> with <x> for [eo], <y> from [lcr]
214
- % and <z> from [hf].
215
-
216
- \def\f@ncyhf#1#2[#3]#4{%
217
- \def\temp@c{}%
218
- \@forc\tmpf@ra{#3}%
219
- {\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}%
220
- {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
221
- \ifx\@empty\temp@c\else
222
- \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
223
- [#3]}%
224
- \fi
225
- \f@nfor\temp@c{#3}%
226
- {\def@ult\f@@@eo{eo}\temp@c
227
- \if@twoside\else
228
- \if\f@@@eo e\@fancywarning
229
- {\string#1's `E' option without twoside option is useless}\fi\fi
230
- \def@ult\f@@@lcr{lcr}\temp@c
231
- \def@ult\f@@@hf{hf}{#2\temp@c}%
232
- \@forc\f@@eo\f@@@eo
233
- {\@forc\f@@lcr\f@@@lcr
234
- {\@forc\f@@hf\f@@@hf
235
- {\expandafter\fancy@def\csname
236
- f@ncy\f@@eo\f@@lcr\f@@hf\endcsname
237
- {#4}}}}}}
238
-
239
- \def\f@ncyhfoffs#1#2[#3]#4{%
240
- \def\temp@c{}%
241
- \@forc\tmpf@ra{#3}%
242
- {\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}%
243
- {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
244
- \ifx\@empty\temp@c\else
245
- \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
246
- [#3]}%
247
- \fi
248
- \f@nfor\temp@c{#3}%
249
- {\def@ult\f@@@eo{eo}\temp@c
250
- \if@twoside\else
251
- \if\f@@@eo e\@fancywarning
252
- {\string#1's `E' option without twoside option is useless}\fi\fi
253
- \def@ult\f@@@lcr{lr}\temp@c
254
- \def@ult\f@@@hf{hf}{#2\temp@c}%
255
- \@forc\f@@eo\f@@@eo
256
- {\@forc\f@@lcr\f@@@lcr
257
- {\@forc\f@@hf\f@@@hf
258
- {\expandafter\setlength\csname
259
- f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname
260
- {#4}}}}}%
261
- \fancy@setoffs}
262
-
263
- % Fancyheadings version 1 commands. These are more or less deprecated,
264
- % but they continue to work.
265
-
266
- \newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}}
267
- \def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}}
268
- \def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}}
269
-
270
- \newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}}
271
- \def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}}
272
- \def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}}
273
-
274
- \newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}}
275
- \def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}}
276
- \def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}}
277
-
278
- \newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}}
279
- \def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}}
280
- \def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}}
281
-
282
- \newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}}
283
- \def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}}
284
- \def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}}
285
-
286
- \newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}}
287
- \def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}}
288
- \def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}}
289
-
290
- \newlength{\fancy@headwidth}
291
- \let\headwidth\fancy@headwidth
292
- \newlength{\f@ncyO@elh}
293
- \newlength{\f@ncyO@erh}
294
- \newlength{\f@ncyO@olh}
295
- \newlength{\f@ncyO@orh}
296
- \newlength{\f@ncyO@elf}
297
- \newlength{\f@ncyO@erf}
298
- \newlength{\f@ncyO@olf}
299
- \newlength{\f@ncyO@orf}
300
- \newcommand{\headrulewidth}{0.4pt}
301
- \newcommand{\footrulewidth}{0pt}
302
- \newcommand{\footruleskip}{.3\normalbaselineskip}
303
-
304
- % Fancyplain stuff shouldn't be used anymore (rather
305
- % \fancypagestyle{plain} should be used), but it must be present for
306
- % compatibility reasons.
307
-
308
- \newcommand{\plainheadrulewidth}{0pt}
309
- \newcommand{\plainfootrulewidth}{0pt}
310
- \newif\if@fancyplain \@fancyplainfalse
311
- \def\fancyplain#1#2{\if@fancyplain#1\else#2\fi}
312
-
313
- \headwidth=-123456789sp %magic constant
314
-
315
- % Command to reset various things in the headers:
316
- % a.o. single spacing (taken from setspace.sty)
317
- % and the catcode of ^^M (so that epsf files in the header work if a
318
- % verbatim crosses a page boundary)
319
- % It also defines a \nouppercase command that disables \uppercase and
320
- % \Makeuppercase. It can only be used in the headers and footers.
321
- \let\fnch@everypar\everypar% save real \everypar because of spanish.ldf
322
- \def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13
323
- \def\baselinestretch{1}%
324
- \def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax
325
- \expandafter\let\csname MakeUppercase \endcsname\relax##1}}%
326
- \ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e
327
- \ifx\@normalsize\undefined \normalsize % for ucthesis.cls
328
- \else \@normalsize \fi
329
- \else% NFSS (2.09) present
330
- \@newbaseline%
331
- \fi}
332
-
333
- % Initialization of the head and foot text.
334
-
335
- % The default values still contain \fancyplain for compatibility.
336
- \fancyhf{} % clear all
337
- % lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages
338
- % evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages
339
- \if@twoside
340
- \fancyhead[el,or]{\fancyplain{}{\sl\rightmark}}
341
- \fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}}
342
- \else
343
- \fancyhead[l]{\fancyplain{}{\sl\rightmark}}
344
- \fancyhead[r]{\fancyplain{}{\sl\leftmark}}
345
- \fi
346
- \fancyfoot[c]{\rm\thepage} % page number
347
-
348
- % Use box 0 as a temp box and dimen 0 as temp dimen.
349
- % This can be done, because this code will always
350
- % be used inside another box, and therefore the changes are local.
351
-
352
- \def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning
353
- {\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J
354
- We now make it that large for the rest of the document.^^J
355
- This may cause the page layout to be inconsistent, however\@gobble}%
356
- \dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi
357
- \box0}
358
-
359
- % Put together a header or footer given the left, center and
360
- % right text, fillers at left and right and a rule.
361
- % The \lap commands put the text into an hbox of zero size,
362
- % so overlapping text does not generate an errormessage.
363
- % These macros have 5 parameters:
364
- % 1. LEFTSIDE BEARING % This determines at which side the header will stick
365
- % out. When \fancyhfoffset is used this calculates \headwidth, otherwise
366
- % it is \hss or \relax (after expansion).
367
- % 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component.
368
- % 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp.
369
- % 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component.
370
- % 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion).
371
-
372
- \def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
373
- \@fancyvbox\headheight{\hbox
374
- {\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill
375
- \parbox[b]{\headwidth}{\centering#3}\hfill
376
- \llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5}
377
-
378
- \def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
379
- \@fancyvbox\footskip{\footrule
380
- \hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill
381
- \parbox[t]{\headwidth}{\centering#3}\hfill
382
- \llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5}
383
-
384
- \def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi
385
- \hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}}
386
-
387
- \def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi
388
- \vskip-\footruleskip\vskip-\footrulewidth
389
- \hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}}
390
-
391
- \def\ps@fancy{%
392
- \@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook
393
- %
394
- % Define \MakeUppercase for old LaTeXen.
395
- % Note: we used \def rather than \let, so that \let\uppercase\relax (from
396
- % the version 1 documentation) will still work.
397
- %
398
- \@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}%
399
- \@ifundefined{chapter}{\def\sectionmark##1{\markboth
400
- {\MakeUppercase{\ifnum \c@secnumdepth>\z@
401
- \thesection\hskip 1em\relax \fi ##1}}{}}%
402
- \def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne
403
- \thesubsection\hskip 1em\relax \fi ##1}}}%
404
- {\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne
405
- \@chapapp\ \thechapter. \ \fi ##1}}{}}%
406
- \def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@
407
- \thesection. \ \fi ##1}}}}%
408
- %\csname ps@headings\endcsname % use \ps@headings defaults if they exist
409
- \ps@@fancy
410
- \gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}%
411
- % Initialize \headwidth if the user didn't
412
- %
413
- \ifdim\headwidth<0sp
414
- %
415
- % This catches the case that \headwidth hasn't been initialized and the
416
- % case that the user added something to \headwidth in the expectation that
417
- % it was initialized to \textwidth. We compensate this now. This loses if
418
- % the user intended to multiply it by a factor. But that case is more
419
- % likely done by saying something like \headwidth=1.2\textwidth.
420
- % The doc says you have to change \headwidth after the first call to
421
- % \pagestyle{fancy}. This code is just to catch the most common cases were
422
- % that requirement is violated.
423
- %
424
- \global\advance\headwidth123456789sp\global\advance\headwidth\textwidth
425
- \fi}
426
- \def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy}
427
- \def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy}
428
- \let\ps@@empty\ps@empty
429
- \def\ps@@fancy{%
430
- \ps@@empty % This is for amsbook/amsart, which do strange things with \topskip
431
- \def\@mkboth{\protect\markboth}%
432
- \def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}%
433
- \def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}%
434
- \def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}%
435
- \def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}%
436
- }
437
- % Default definitions for compatibility mode:
438
- % These cause the header/footer to take the defined \headwidth as width
439
- % And to shift in the direction of the marginpar area
440
-
441
- \def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi}
442
- \def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi}
443
- \let\fancy@Oelh\fancy@Oorh
444
- \let\fancy@Oerh\fancy@Oolh
445
-
446
- \let\fancy@Oolf\fancy@Oolh
447
- \let\fancy@Oorf\fancy@Oorh
448
- \let\fancy@Oelf\fancy@Oelh
449
- \let\fancy@Oerf\fancy@Oerh
450
-
451
- % New definitions for the use of \fancyhfoffset
452
- % These calculate the \headwidth from \textwidth and the specified offsets.
453
-
454
- \def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh
455
- \advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh}
456
- \def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh
457
- \advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh}
458
-
459
- \def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf
460
- \advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf}
461
- \def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf
462
- \advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf}
463
-
464
- \def\fancy@setoffs{%
465
- % Just in case \let\headwidth\textwidth was used
466
- \fancy@gbl\let\headwidth\fancy@headwidth
467
- \fancy@gbl\let\fancy@Oolh\fancy@offsolh
468
- \fancy@gbl\let\fancy@Oelh\fancy@offselh
469
- \fancy@gbl\let\fancy@Oorh\hss
470
- \fancy@gbl\let\fancy@Oerh\hss
471
- \fancy@gbl\let\fancy@Oolf\fancy@offsolf
472
- \fancy@gbl\let\fancy@Oelf\fancy@offself
473
- \fancy@gbl\let\fancy@Oorf\hss
474
- \fancy@gbl\let\fancy@Oerf\hss}
475
-
476
- \newif\iffootnote
477
- \let\latex@makecol\@makecol
478
- \def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi
479
- \let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol}
480
- \def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi}
481
- \def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi}
482
- \def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi}
483
-
484
- \newcommand{\fancypagestyle}[2]{%
485
- \@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/generation.log DELETED
@@ -1,123 +0,0 @@
1
- INFO:utils.gpt_interaction:{"Markov Decision Process": 5, "Q-Learning": 4, "Policy Gradient": 4, "Deep Reinforcement Learning": 5, "Temporal Difference": 3}
2
- INFO:root:For generating keywords, 119 tokens have been used (79 for prompts; 40 for completion). 119 tokens have been used in total.
3
- INFO:utils.prompts:Generated prompts for introduction: I am writing a machine learning survey about 'Reinforcement Learning'.
4
- You need to write the introduction section. Please include five paragraph: Establishing the motivation for the research. Explaining its importance and relevance to the AI community. Clearly state the problem you're addressing, your proposed solution, and the specific research questions or objectives. Briefly mention key related work for context. Explain the main differences from your work.
5
- Please read the following references:
6
- {'1512.07669': ' This article presents a short and concise description of stochastic\napproximation algorithms in reinforcement learning of Markov decision\nprocesses. The algorithms can also be used as a suboptimal method for partially\nobserved Markov decision processes.\n', '1511.02377': ' We provide a full characterization of the set of value functions of Markov\ndecision processes.\n', '1512.09075': ' This paper specifies a notation for Markov decision processes.\n', '2008.10426': ' Decisiveness has proven to be an elegant concept for denumerable Markov\nchains: it is general enough to encompass several natural classes of\ndenumerable Markov chains, and is a sufficient condition for simple qualitative\nand approximate quantitative model checking algorithms to exist. In this paper,\nwe explore how to extend the notion of decisiveness to Markov decision\nprocesses. Compared to Markov chains, the extra non-determinism can be resolved\nin an adversarial or cooperative way, yielding two natural notions of\ndecisiveness. We then explore whether these notions yield model checking\nprocedures concerning the infimum and supremum probabilities of reachability\nproperties.\n', '0711.2185': ' For a countable-state Markov decision process we introduce an embedding which\nproduces a finite-state Markov decision process. The finite-state embedded\nprocess has the same optimal cost, and moreover, it has the same dynamics as\nthe original process when restricting to the approximating set. The embedded\nprocess can be used as an approximation which, being finite, is more convenient\nfor computation and implementation.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1708.05866': ' Deep reinforcement learning is poised to revolutionise the field of AI and\nrepresents a step towards building autonomous systems with a higher level\nunderstanding of the visual world. Currently, deep learning is enabling\nreinforcement learning to scale to problems that were previously intractable,\nsuch as learning to play video games directly from pixels. Deep reinforcement\nlearning algorithms are also applied to robotics, allowing control policies for\nrobots to be learned directly from camera inputs in the real world. In this\nsurvey, we begin with an introduction to the general field of reinforcement\nlearning, then progress to the main streams of value-based and policy-based\nmethods. Our survey will cover central algorithms in deep reinforcement\nlearning, including the deep $Q$-network, trust region policy optimisation, and\nasynchronous advantage actor-critic. In parallel, we highlight the unique\nadvantages of deep neural networks, focusing on visual understanding via\nreinforcement learning. To conclude, we describe several current areas of\nresearch within the field.\n', '1906.10025': ' Recent advances in Reinforcement Learning, grounded on combining classical\ntheoretical results with Deep Learning paradigm, led to breakthroughs in many\nartificial intelligence tasks and gave birth to Deep Reinforcement Learning\n(DRL) as a field of research. In this work latest DRL algorithms are reviewed\nwith a focus on their theoretical justification, practical limitations and\nobserved empirical properties.\n', '2111.01334': ' Quantifying the structural and functional differences of temporal networks is\na fundamental and challenging problem in the era of big data. This work\nproposes a temporal dissimilarity measure for temporal network comparison based\non the fastest arrival distance distribution and spectral entropy based\nJensen-Shannon divergence. Experimental results on both synthetic and empirical\ntemporal networks show that the proposed measure could discriminate diverse\ntemporal networks with different structures by capturing various topological\nand temporal properties. Moreover, the proposed measure can discern the\nfunctional distinctions and is found effective applications in temporal network\nclassification and spreadability discrimination.\n', '2110.06553': ' Electroencephalography (EEG) is a popular and effective tool for emotion\nrecognition. However, the propagation mechanisms of EEG in the human brain and\nits intrinsic correlation with emotions are still obscure to researchers. This\nwork proposes four variant transformer frameworks~(spatial attention, temporal\nattention, sequential spatial-temporal attention and simultaneous\nspatial-temporal attention) for EEG emotion recognition to explore the\nrelationship between emotion and spatial-temporal EEG features. Specifically,\nspatial attention and temporal attention are to learn the topological structure\ninformation and time-varying EEG characteristics for emotion recognition\nrespectively. Sequential spatial-temporal attention does the spatial attention\nwithin a one-second segment and temporal attention within one sample\nsequentially to explore the influence degree of emotional stimulation on EEG\nsignals of diverse EEG electrodes in the same temporal segment. The\nsimultaneous spatial-temporal attention, whose spatial and temporal attention\nare performed simultaneously, is used to model the relationship between\ndifferent spatial features in different time segments. The experimental results\ndemonstrate that simultaneous spatial-temporal attention leads to the best\nemotion recognition accuracy among the design choices, indicating modeling the\ncorrelation of spatial and temporal features of EEG signals is significant to\nemotion recognition.\n', '2007.04828': ' Links in most real networks often change over time. Such temporality of links\nencodes the ordering and causality of interactions between nodes and has a\nprofound effect on network dynamics and function. Empirical evidences have\nshown that the temporal nature of links in many real-world networks is not\nrandom. Nonetheless, it is challenging to predict temporal link patterns while\nconsidering the entanglement between topological and temporal link patterns.\nHere we propose an entropy-rate based framework, based on combined\ntopological-temporal regularities, for quantifying the predictability of any\ntemporal network. We apply our framework on various model networks,\ndemonstrating that it indeed captures the intrinsic topological-temporal\nregularities whereas previous methods considered only temporal aspects. We also\napply our framework on 18 real networks of different types and determine their\npredictability. Interestingly, we find that for most real temporal networks,\ndespite the greater complexity of predictability brought by the increase in\ndimension the combined topological-temporal predictability is higher than the\ntemporal predictability. Our results demonstrate the necessity of incorporating\nboth temporal and topological aspects of networks in order to improve\npredictions of dynamical processes.\n'}
7
- Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph.
8
- Put your response (do not include \section{...}) in the following Python script:with open("introduction.tex", "w") as f: f.write(r'''your_response''')
9
- INFO:utils.gpt_interaction:Reinforcement Learning (RL) has emerged as a significant research area in the field of artificial intelligence, with a wide range of applications in robotics, finance, healthcare, and gaming \cite{2108.11510}. The primary goal of RL is to develop algorithms that allow agents to learn optimal policies through interaction with their environment, maximizing the cumulative reward over time \cite{1708.05866}. Despite the considerable progress made in recent years, RL still faces several challenges, such as the trade-off between exploration and exploitation, the curse of dimensionality, and the need for efficient algorithms that can handle large-scale and complex problems \cite{1906.10025}.
10
-
11
- One of the major breakthroughs in RL has been the development of Q-learning algorithms, which have been proven to converge to the optimal solution \cite{2303.08631}. However, Q-learning is known to suffer from overestimation bias, leading to suboptimal performance and slow convergence in some cases \cite{2106.14642}. To address this issue, researchers have proposed various modifications and extensions to Q-learning, such as Double Q-learning \cite{1511.02377} and Self-correcting Q-learning \cite{2012.01100}, which aim to mitigate the overestimation bias while maintaining convergence guarantees.
12
-
13
- Another essential aspect of RL research is the incorporation of deep learning techniques, giving rise to the field of Deep Reinforcement Learning (DRL) \cite{1709.05067}. DRL has demonstrated remarkable success in various domains, such as playing video games directly from pixels and learning control policies for robots \cite{1708.05866}. However, DRL algorithms often require a large amount of data and computational resources, which limits their applicability in real-world scenarios \cite{1906.10025}. To overcome these limitations, researchers have proposed various approaches, including distributed DRL \cite{2212.00253} and expert-guided DRL \cite{2106.14642}, which aim to improve the sample efficiency and scalability of DRL algorithms.
14
-
15
- Related work in the field of RL has also focused on the development of policy gradient methods, which optimize the policy directly by following the gradient of the expected return \cite{1811.09013}. These methods have been particularly successful in continuous action settings and have led to the development of algorithms such as Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. However, policy gradient methods often require on-policy data, which can be inefficient in terms of sample complexity \cite{1911.04817}.
16
-
17
- In summary, this survey aims to provide a comprehensive overview of the current state of Reinforcement Learning, focusing on the challenges and recent advances in Q-learning, Deep Reinforcement Learning, and policy gradient methods. By examining the key algorithms, techniques, and applications in these areas, we hope to shed light on the current limitations and future research directions in the field of RL.
18
- INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message.
19
-
20
- INFO:utils.gpt_interaction:assistant_message: Reinforcement Learning (RL) has emerged as a significant research area in the field of artificial intelligence, with a wide range of applications in robotics, finance, healthcare, and gaming \cite{2108.11510}. The primary goal of RL is to develop algorithms that allow agents to learn optimal policies through interaction with their environment, maximizing the cumulative reward over time \cite{1708.05866}. Despite the considerable progress made in recent years, RL still faces several challenges, such as the trade-off between exploration and exploitation, the curse of dimensionality, and the need for efficient algorithms that can handle large-scale and complex problems \cite{1906.10025}.
21
-
22
- One of the major breakthroughs in RL has been the development of Q-learning algorithms, which have been proven to converge to the optimal solution \cite{2303.08631}. However, Q-learning is known to suffer from overestimation bias, leading to suboptimal performance and slow convergence in some cases \cite{2106.14642}. To address this issue, researchers have proposed various modifications and extensions to Q-learning, such as Double Q-learning \cite{1511.02377} and Self-correcting Q-learning \cite{2012.01100}, which aim to mitigate the overestimation bias while maintaining convergence guarantees.
23
-
24
- Another essential aspect of RL research is the incorporation of deep learning techniques, giving rise to the field of Deep Reinforcement Learning (DRL) \cite{1709.05067}. DRL has demonstrated remarkable success in various domains, such as playing video games directly from pixels and learning control policies for robots \cite{1708.05866}. However, DRL algorithms often require a large amount of data and computational resources, which limits their applicability in real-world scenarios \cite{1906.10025}. To overcome these limitations, researchers have proposed various approaches, including distributed DRL \cite{2212.00253} and expert-guided DRL \cite{2106.14642}, which aim to improve the sample efficiency and scalability of DRL algorithms.
25
-
26
- Related work in the field of RL has also focused on the development of policy gradient methods, which optimize the policy directly by following the gradient of the expected return \cite{1811.09013}. These methods have been particularly successful in continuous action settings and have led to the development of algorithms such as Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. However, policy gradient methods often require on-policy data, which can be inefficient in terms of sample complexity \cite{1911.04817}.
27
-
28
- In summary, this survey aims to provide a comprehensive overview of the current state of Reinforcement Learning, focusing on the challenges and recent advances in Q-learning, Deep Reinforcement Learning, and policy gradient methods. By examining the key algorithms, techniques, and applications in these areas, we hope to shed light on the current limitations and future research directions in the field of RL.
29
- INFO:root:For generating introduction, 4397 tokens have been used (3781 for prompts; 616 for completion). 4516 tokens have been used in total.
30
- INFO:utils.prompts:Generated prompts for related works: I am writing a machine learning survey about 'Reinforcement Learning'.
31
- You need to write the related works section. Please discuss key publications, methods, and techniques in your research area. Analyze the strengths and weaknesses of existing methods, and present the related works in a logical manner, often chronologically. Consider using a taxonomy or categorization to structure the discussion. Do not use \section{...} or \subsection{...}; use \paragraph{...} instead.
32
- Please read the following references:
33
- {'1512.07669': ' This article presents a short and concise description of stochastic\napproximation algorithms in reinforcement learning of Markov decision\nprocesses. The algorithms can also be used as a suboptimal method for partially\nobserved Markov decision processes.\n', '1511.02377': ' We provide a full characterization of the set of value functions of Markov\ndecision processes.\n', '1512.09075': ' This paper specifies a notation for Markov decision processes.\n', '2008.10426': ' Decisiveness has proven to be an elegant concept for denumerable Markov\nchains: it is general enough to encompass several natural classes of\ndenumerable Markov chains, and is a sufficient condition for simple qualitative\nand approximate quantitative model checking algorithms to exist. In this paper,\nwe explore how to extend the notion of decisiveness to Markov decision\nprocesses. Compared to Markov chains, the extra non-determinism can be resolved\nin an adversarial or cooperative way, yielding two natural notions of\ndecisiveness. We then explore whether these notions yield model checking\nprocedures concerning the infimum and supremum probabilities of reachability\nproperties.\n', '0711.2185': ' For a countable-state Markov decision process we introduce an embedding which\nproduces a finite-state Markov decision process. The finite-state embedded\nprocess has the same optimal cost, and moreover, it has the same dynamics as\nthe original process when restricting to the approximating set. The embedded\nprocess can be used as an approximation which, being finite, is more convenient\nfor computation and implementation.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1708.05866': ' Deep reinforcement learning is poised to revolutionise the field of AI and\nrepresents a step towards building autonomous systems with a higher level\nunderstanding of the visual world. Currently, deep learning is enabling\nreinforcement learning to scale to problems that were previously intractable,\nsuch as learning to play video games directly from pixels. Deep reinforcement\nlearning algorithms are also applied to robotics, allowing control policies for\nrobots to be learned directly from camera inputs in the real world. In this\nsurvey, we begin with an introduction to the general field of reinforcement\nlearning, then progress to the main streams of value-based and policy-based\nmethods. Our survey will cover central algorithms in deep reinforcement\nlearning, including the deep $Q$-network, trust region policy optimisation, and\nasynchronous advantage actor-critic. In parallel, we highlight the unique\nadvantages of deep neural networks, focusing on visual understanding via\nreinforcement learning. To conclude, we describe several current areas of\nresearch within the field.\n', '1906.10025': ' Recent advances in Reinforcement Learning, grounded on combining classical\ntheoretical results with Deep Learning paradigm, led to breakthroughs in many\nartificial intelligence tasks and gave birth to Deep Reinforcement Learning\n(DRL) as a field of research. In this work latest DRL algorithms are reviewed\nwith a focus on their theoretical justification, practical limitations and\nobserved empirical properties.\n', '2111.01334': ' Quantifying the structural and functional differences of temporal networks is\na fundamental and challenging problem in the era of big data. This work\nproposes a temporal dissimilarity measure for temporal network comparison based\non the fastest arrival distance distribution and spectral entropy based\nJensen-Shannon divergence. Experimental results on both synthetic and empirical\ntemporal networks show that the proposed measure could discriminate diverse\ntemporal networks with different structures by capturing various topological\nand temporal properties. Moreover, the proposed measure can discern the\nfunctional distinctions and is found effective applications in temporal network\nclassification and spreadability discrimination.\n', '2110.06553': ' Electroencephalography (EEG) is a popular and effective tool for emotion\nrecognition. However, the propagation mechanisms of EEG in the human brain and\nits intrinsic correlation with emotions are still obscure to researchers. This\nwork proposes four variant transformer frameworks~(spatial attention, temporal\nattention, sequential spatial-temporal attention and simultaneous\nspatial-temporal attention) for EEG emotion recognition to explore the\nrelationship between emotion and spatial-temporal EEG features. Specifically,\nspatial attention and temporal attention are to learn the topological structure\ninformation and time-varying EEG characteristics for emotion recognition\nrespectively. Sequential spatial-temporal attention does the spatial attention\nwithin a one-second segment and temporal attention within one sample\nsequentially to explore the influence degree of emotional stimulation on EEG\nsignals of diverse EEG electrodes in the same temporal segment. The\nsimultaneous spatial-temporal attention, whose spatial and temporal attention\nare performed simultaneously, is used to model the relationship between\ndifferent spatial features in different time segments. The experimental results\ndemonstrate that simultaneous spatial-temporal attention leads to the best\nemotion recognition accuracy among the design choices, indicating modeling the\ncorrelation of spatial and temporal features of EEG signals is significant to\nemotion recognition.\n', '2007.04828': ' Links in most real networks often change over time. Such temporality of links\nencodes the ordering and causality of interactions between nodes and has a\nprofound effect on network dynamics and function. Empirical evidences have\nshown that the temporal nature of links in many real-world networks is not\nrandom. Nonetheless, it is challenging to predict temporal link patterns while\nconsidering the entanglement between topological and temporal link patterns.\nHere we propose an entropy-rate based framework, based on combined\ntopological-temporal regularities, for quantifying the predictability of any\ntemporal network. We apply our framework on various model networks,\ndemonstrating that it indeed captures the intrinsic topological-temporal\nregularities whereas previous methods considered only temporal aspects. We also\napply our framework on 18 real networks of different types and determine their\npredictability. Interestingly, we find that for most real temporal networks,\ndespite the greater complexity of predictability brought by the increase in\ndimension the combined topological-temporal predictability is higher than the\ntemporal predictability. Our results demonstrate the necessity of incorporating\nboth temporal and topological aspects of networks in order to improve\npredictions of dynamical processes.\n'}
34
- Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph.
35
- Put your response (do not include \section{...}) in the following Python script:with open("related works.tex", "w") as f: f.write(r'''your_response''')
36
- INFO:utils.gpt_interaction:\paragraph{Markov Decision Processes:}
37
- The study of reinforcement learning is fundamentally rooted in the understanding of Markov decision processes (MDPs). A concise description of stochastic approximation algorithms in reinforcement learning of MDPs is provided by \cite{1512.07669}. The work done in \cite{1511.02377} offers a full characterization of the set of value functions of MDPs, while \cite{1512.09075} specifies a notation for MDPs. The concept of decisiveness in denumerable Markov chains has been extended to MDPs in \cite{2008.10426}, exploring the implications of resolving non-determinism in adversarial or cooperative ways. Additionally, \cite{0711.2185} introduces an embedding technique to produce a finite-state MDP from a countable-state MDP, which can be used as an approximation for computational purposes.
38
-
39
- \paragraph{Q-Learning and Variants:}
40
- Q-learning is a widely used reinforcement learning algorithm that converges to the optimal solution \cite{2303.08631}. However, it is known to overestimate values and spend too much time exploring unhelpful states. Double Q-learning, a convergent alternative, mitigates some of these overestimation issues but may lead to slower convergence \cite{2303.08631}. To address the maximization bias in Q-learning, \cite{2012.01100} introduces a self-correcting algorithm that balances the overestimation of conventional Q-learning and the underestimation of Double Q-learning. This self-correcting Q-learning algorithm is shown to be more accurate and achieves faster convergence in certain domains.
41
-
42
- \paragraph{Expert Q-Learning:}
43
- Expert Q-learning is a novel deep reinforcement learning algorithm proposed in \cite{2106.14642}. Inspired by Dueling Q-learning, it incorporates semi-supervised learning into reinforcement learning by splitting Q-values into state values and action advantages. An expert network is designed in addition to the Q-network, which updates each time following the regular offline minibatch update. The algorithm is demonstrated to be more resistant to overestimation bias and achieves more robust performance compared to the baseline Q-learning algorithm.
44
-
45
- \paragraph{Policy Gradient Methods:}
46
- Policy gradient methods are widely used for control in reinforcement learning, particularly in continuous action settings. Natural gradients have been extensively studied within the context of natural gradient actor-critic algorithms and deterministic policy gradients \cite{2209.01820}. The work in \cite{1811.09013} presents the first off-policy policy gradient theorem using emphatic weightings and develops a new actor-critic algorithm called Actor Critic with Emphatic weightings (ACE) that approximates the simplified gradients provided by the theorem. This algorithm is shown to outperform previous off-policy policy gradient methods, such as OffPAC and DPG, in finding the optimal solution.
47
-
48
- \paragraph{Deep Reinforcement Learning:}
49
- Deep reinforcement learning (DRL) combines the power of deep learning with reinforcement learning, achieving remarkable success in various domains, such as finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. The field has seen significant advancements in recent years, with central algorithms such as the deep Q-network, trust region policy optimization, and asynchronous advantage actor-critic being developed \cite{1708.05866}. A detailed review of DRL algorithms and their theoretical justifications, practical limitations, and empirical properties can be found in \cite{1906.10025}.
50
-
51
- \paragraph{Temporal Networks:}
52
- Temporal networks, where links change over time, are essential in understanding the ordering and causality of interactions between nodes in various applications. The work in \cite{2111.01334} proposes a temporal dissimilarity measure for temporal network comparison based on the fastest arrival distance distribution and spectral entropy-based Jensen-Shannon divergence. This measure is shown to effectively discriminate diverse temporal networks with different structures and functional distinctions.
53
-
54
- In conclusion, reinforcement learning has seen significant advancements in recent years, with various algorithms and techniques being developed to address the challenges in the field. From understanding the fundamentals of MDPs to developing advanced DRL algorithms, researchers continue to push the boundaries of what is possible in reinforcement learning and its applications.
55
- INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message.
56
-
57
- INFO:utils.gpt_interaction:assistant_message: \paragraph{Markov Decision Processes:}
58
- The study of reinforcement learning is fundamentally rooted in the understanding of Markov decision processes (MDPs). A concise description of stochastic approximation algorithms in reinforcement learning of MDPs is provided by \cite{1512.07669}. The work done in \cite{1511.02377} offers a full characterization of the set of value functions of MDPs, while \cite{1512.09075} specifies a notation for MDPs. The concept of decisiveness in denumerable Markov chains has been extended to MDPs in \cite{2008.10426}, exploring the implications of resolving non-determinism in adversarial or cooperative ways. Additionally, \cite{0711.2185} introduces an embedding technique to produce a finite-state MDP from a countable-state MDP, which can be used as an approximation for computational purposes.
59
-
60
- \paragraph{Q-Learning and Variants:}
61
- Q-learning is a widely used reinforcement learning algorithm that converges to the optimal solution \cite{2303.08631}. However, it is known to overestimate values and spend too much time exploring unhelpful states. Double Q-learning, a convergent alternative, mitigates some of these overestimation issues but may lead to slower convergence \cite{2303.08631}. To address the maximization bias in Q-learning, \cite{2012.01100} introduces a self-correcting algorithm that balances the overestimation of conventional Q-learning and the underestimation of Double Q-learning. This self-correcting Q-learning algorithm is shown to be more accurate and achieves faster convergence in certain domains.
62
-
63
- \paragraph{Expert Q-Learning:}
64
- Expert Q-learning is a novel deep reinforcement learning algorithm proposed in \cite{2106.14642}. Inspired by Dueling Q-learning, it incorporates semi-supervised learning into reinforcement learning by splitting Q-values into state values and action advantages. An expert network is designed in addition to the Q-network, which updates each time following the regular offline minibatch update. The algorithm is demonstrated to be more resistant to overestimation bias and achieves more robust performance compared to the baseline Q-learning algorithm.
65
-
66
- \paragraph{Policy Gradient Methods:}
67
- Policy gradient methods are widely used for control in reinforcement learning, particularly in continuous action settings. Natural gradients have been extensively studied within the context of natural gradient actor-critic algorithms and deterministic policy gradients \cite{2209.01820}. The work in \cite{1811.09013} presents the first off-policy policy gradient theorem using emphatic weightings and develops a new actor-critic algorithm called Actor Critic with Emphatic weightings (ACE) that approximates the simplified gradients provided by the theorem. This algorithm is shown to outperform previous off-policy policy gradient methods, such as OffPAC and DPG, in finding the optimal solution.
68
-
69
- \paragraph{Deep Reinforcement Learning:}
70
- Deep reinforcement learning (DRL) combines the power of deep learning with reinforcement learning, achieving remarkable success in various domains, such as finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. The field has seen significant advancements in recent years, with central algorithms such as the deep Q-network, trust region policy optimization, and asynchronous advantage actor-critic being developed \cite{1708.05866}. A detailed review of DRL algorithms and their theoretical justifications, practical limitations, and empirical properties can be found in \cite{1906.10025}.
71
-
72
- \paragraph{Temporal Networks:}
73
- Temporal networks, where links change over time, are essential in understanding the ordering and causality of interactions between nodes in various applications. The work in \cite{2111.01334} proposes a temporal dissimilarity measure for temporal network comparison based on the fastest arrival distance distribution and spectral entropy-based Jensen-Shannon divergence. This measure is shown to effectively discriminate diverse temporal networks with different structures and functional distinctions.
74
-
75
- In conclusion, reinforcement learning has seen significant advancements in recent years, with various algorithms and techniques being developed to address the challenges in the field. From understanding the fundamentals of MDPs to developing advanced DRL algorithms, researchers continue to push the boundaries of what is possible in reinforcement learning and its applications.
76
- INFO:root:For generating related works, 4667 tokens have been used (3793 for prompts; 874 for completion). 9183 tokens have been used in total.
77
- INFO:utils.prompts:Generated prompts for backgrounds: I am writing a machine learning survey about 'Reinforcement Learning'.
78
- You need to write the backgrounds section. Please clearly state the problem your research addresses. Explain the foundational theories, concepts, and principles that underpin your research using as many as mathematical formulas or equations (written in LaTeX). Introduce any necessary mathematical notations, equations, or algorithms that are central to your work (written them in LaTeX). Provide a high-level summary of the methodology in this paper to address the problem. Introduce the evaluation metrics this paper will use to assess the performance of models or algorithms. Do not include \section{...} but you can have \subsection{...}.
79
- Please read the following references:
80
- {'1512.07669': ' This article presents a short and concise description of stochastic\napproximation algorithms in reinforcement learning of Markov decision\nprocesses. The algorithms can also be used as a suboptimal method for partially\nobserved Markov decision processes.\n', '1511.02377': ' We provide a full characterization of the set of value functions of Markov\ndecision processes.\n', '1512.09075': ' This paper specifies a notation for Markov decision processes.\n', '2008.10426': ' Decisiveness has proven to be an elegant concept for denumerable Markov\nchains: it is general enough to encompass several natural classes of\ndenumerable Markov chains, and is a sufficient condition for simple qualitative\nand approximate quantitative model checking algorithms to exist. In this paper,\nwe explore how to extend the notion of decisiveness to Markov decision\nprocesses. Compared to Markov chains, the extra non-determinism can be resolved\nin an adversarial or cooperative way, yielding two natural notions of\ndecisiveness. We then explore whether these notions yield model checking\nprocedures concerning the infimum and supremum probabilities of reachability\nproperties.\n', '0711.2185': ' For a countable-state Markov decision process we introduce an embedding which\nproduces a finite-state Markov decision process. The finite-state embedded\nprocess has the same optimal cost, and moreover, it has the same dynamics as\nthe original process when restricting to the approximating set. The embedded\nprocess can be used as an approximation which, being finite, is more convenient\nfor computation and implementation.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1708.05866': ' Deep reinforcement learning is poised to revolutionise the field of AI and\nrepresents a step towards building autonomous systems with a higher level\nunderstanding of the visual world. Currently, deep learning is enabling\nreinforcement learning to scale to problems that were previously intractable,\nsuch as learning to play video games directly from pixels. Deep reinforcement\nlearning algorithms are also applied to robotics, allowing control policies for\nrobots to be learned directly from camera inputs in the real world. In this\nsurvey, we begin with an introduction to the general field of reinforcement\nlearning, then progress to the main streams of value-based and policy-based\nmethods. Our survey will cover central algorithms in deep reinforcement\nlearning, including the deep $Q$-network, trust region policy optimisation, and\nasynchronous advantage actor-critic. In parallel, we highlight the unique\nadvantages of deep neural networks, focusing on visual understanding via\nreinforcement learning. To conclude, we describe several current areas of\nresearch within the field.\n', '1906.10025': ' Recent advances in Reinforcement Learning, grounded on combining classical\ntheoretical results with Deep Learning paradigm, led to breakthroughs in many\nartificial intelligence tasks and gave birth to Deep Reinforcement Learning\n(DRL) as a field of research. In this work latest DRL algorithms are reviewed\nwith a focus on their theoretical justification, practical limitations and\nobserved empirical properties.\n', '2111.01334': ' Quantifying the structural and functional differences of temporal networks is\na fundamental and challenging problem in the era of big data. This work\nproposes a temporal dissimilarity measure for temporal network comparison based\non the fastest arrival distance distribution and spectral entropy based\nJensen-Shannon divergence. Experimental results on both synthetic and empirical\ntemporal networks show that the proposed measure could discriminate diverse\ntemporal networks with different structures by capturing various topological\nand temporal properties. Moreover, the proposed measure can discern the\nfunctional distinctions and is found effective applications in temporal network\nclassification and spreadability discrimination.\n', '2110.06553': ' Electroencephalography (EEG) is a popular and effective tool for emotion\nrecognition. However, the propagation mechanisms of EEG in the human brain and\nits intrinsic correlation with emotions are still obscure to researchers. This\nwork proposes four variant transformer frameworks~(spatial attention, temporal\nattention, sequential spatial-temporal attention and simultaneous\nspatial-temporal attention) for EEG emotion recognition to explore the\nrelationship between emotion and spatial-temporal EEG features. Specifically,\nspatial attention and temporal attention are to learn the topological structure\ninformation and time-varying EEG characteristics for emotion recognition\nrespectively. Sequential spatial-temporal attention does the spatial attention\nwithin a one-second segment and temporal attention within one sample\nsequentially to explore the influence degree of emotional stimulation on EEG\nsignals of diverse EEG electrodes in the same temporal segment. The\nsimultaneous spatial-temporal attention, whose spatial and temporal attention\nare performed simultaneously, is used to model the relationship between\ndifferent spatial features in different time segments. The experimental results\ndemonstrate that simultaneous spatial-temporal attention leads to the best\nemotion recognition accuracy among the design choices, indicating modeling the\ncorrelation of spatial and temporal features of EEG signals is significant to\nemotion recognition.\n', '2007.04828': ' Links in most real networks often change over time. Such temporality of links\nencodes the ordering and causality of interactions between nodes and has a\nprofound effect on network dynamics and function. Empirical evidences have\nshown that the temporal nature of links in many real-world networks is not\nrandom. Nonetheless, it is challenging to predict temporal link patterns while\nconsidering the entanglement between topological and temporal link patterns.\nHere we propose an entropy-rate based framework, based on combined\ntopological-temporal regularities, for quantifying the predictability of any\ntemporal network. We apply our framework on various model networks,\ndemonstrating that it indeed captures the intrinsic topological-temporal\nregularities whereas previous methods considered only temporal aspects. We also\napply our framework on 18 real networks of different types and determine their\npredictability. Interestingly, we find that for most real temporal networks,\ndespite the greater complexity of predictability brought by the increase in\ndimension the combined topological-temporal predictability is higher than the\ntemporal predictability. Our results demonstrate the necessity of incorporating\nboth temporal and topological aspects of networks in order to improve\npredictions of dynamical processes.\n'}
81
- Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph.
82
- Put your response (do not include \section{...}) in the following Python script:with open("backgrounds.tex", "w") as f: f.write(r'''your_response''')
83
- INFO:utils.gpt_interaction:\subsection{Problem Statement and Foundational Concepts}
84
-
85
- Reinforcement Learning (RL) is a subfield of machine learning that focuses on training agents to make decisions in an environment to maximize a cumulative reward signal. In RL, an agent interacts with an environment through a sequence of actions, observations, and rewards, aiming to learn an optimal policy that maps states to actions \cite{1512.09075}. The problem can be formalized as a Markov Decision Process (MDP), which is defined by a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{1511.02377}. The goal of RL is to find a policy $\pi(a|s)$ that maximizes the expected cumulative reward, defined as $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $R_{t+k+1}$ is the reward received at time step $t+k+1$ \cite{1512.07669}.
86
-
87
- \subsection{Q-Learning and Related Algorithms}
88
-
89
- Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected cumulative reward of taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by:
90
-
91
- \[Q(s, a) \leftarrow Q(s, a) + \alpha \left[ R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a) \right],\]
92
-
93
- where $\alpha$ is the learning rate, $R(s, a)$ is the reward for taking action $a$ in state $s$, and $s'$ is the next state \cite{2303.08631}. However, Q-learning can suffer from overestimation bias, which can lead to suboptimal performance \cite{2106.14642}. To address this issue, Double Q-learning was proposed, which uses two separate Q-value estimators and updates them alternately, mitigating overestimation bias while maintaining convergence guarantees \cite{2303.08631}. Another variant, Expert Q-learning, incorporates semi-supervised learning by splitting Q-values into state values and action advantages, and using an expert network to assess the value of states \cite{2106.14642}.
94
-
95
- \subsection{Policy Gradient Methods}
96
-
97
- Policy gradient methods are another class of RL algorithms that optimize the policy directly by estimating the gradient of the expected cumulative reward with respect to the policy parameters \cite{1703.02102}. The policy gradient theorem provides a simplified form for the gradient, which can be used to derive on-policy and off-policy algorithms \cite{1811.09013}. Natural policy gradients, which incorporate second-order information to improve convergence, form the foundation for state-of-the-art algorithms like Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}.
98
-
99
- \subsection{Methodology and Evaluation Metrics}
100
-
101
- In this paper, we will explore various RL algorithms, focusing on Q-learning and its variants, as well as policy gradient methods. We will delve into their theoretical foundations, convergence properties, and practical limitations. To assess the performance of these algorithms, we will use evaluation metrics such as cumulative reward, convergence speed, and sample efficiency. By comparing the performance of different algorithms, we aim to provide insights into their strengths and weaknesses, and identify potential areas for improvement and future research directions.
102
- INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message.
103
-
104
- INFO:utils.gpt_interaction:assistant_message: \subsection{Problem Statement and Foundational Concepts}
105
-
106
- Reinforcement Learning (RL) is a subfield of machine learning that focuses on training agents to make decisions in an environment to maximize a cumulative reward signal. In RL, an agent interacts with an environment through a sequence of actions, observations, and rewards, aiming to learn an optimal policy that maps states to actions \cite{1512.09075}. The problem can be formalized as a Markov Decision Process (MDP), which is defined by a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{1511.02377}. The goal of RL is to find a policy $\pi(a|s)$ that maximizes the expected cumulative reward, defined as $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $R_{t+k+1}$ is the reward received at time step $t+k+1$ \cite{1512.07669}.
107
-
108
- \subsection{Q-Learning and Related Algorithms}
109
-
110
- Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected cumulative reward of taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by:
111
-
112
- \[Q(s, a) \leftarrow Q(s, a) + \alpha \left[ R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a) \right],\]
113
-
114
- where $\alpha$ is the learning rate, $R(s, a)$ is the reward for taking action $a$ in state $s$, and $s'$ is the next state \cite{2303.08631}. However, Q-learning can suffer from overestimation bias, which can lead to suboptimal performance \cite{2106.14642}. To address this issue, Double Q-learning was proposed, which uses two separate Q-value estimators and updates them alternately, mitigating overestimation bias while maintaining convergence guarantees \cite{2303.08631}. Another variant, Expert Q-learning, incorporates semi-supervised learning by splitting Q-values into state values and action advantages, and using an expert network to assess the value of states \cite{2106.14642}.
115
-
116
- \subsection{Policy Gradient Methods}
117
-
118
- Policy gradient methods are another class of RL algorithms that optimize the policy directly by estimating the gradient of the expected cumulative reward with respect to the policy parameters \cite{1703.02102}. The policy gradient theorem provides a simplified form for the gradient, which can be used to derive on-policy and off-policy algorithms \cite{1811.09013}. Natural policy gradients, which incorporate second-order information to improve convergence, form the foundation for state-of-the-art algorithms like Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}.
119
-
120
- \subsection{Methodology and Evaluation Metrics}
121
-
122
- In this paper, we will explore various RL algorithms, focusing on Q-learning and its variants, as well as policy gradient methods. We will delve into their theoretical foundations, convergence properties, and practical limitations. To assess the performance of these algorithms, we will use evaluation metrics such as cumulative reward, convergence speed, and sample efficiency. By comparing the performance of different algorithms, we aim to provide insights into their strengths and weaknesses, and identify potential areas for improvement and future research directions.
123
- INFO:root:For generating backgrounds, 4606 tokens have been used (3831 for prompts; 775 for completion). 13789 tokens have been used in total.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/iclr2022_conference.bst DELETED
@@ -1,1440 +0,0 @@
1
- %% File: `iclr2017.bst'
2
- %% A copy of iclm2010.bst, which is a modification of `plainnl.bst' for use with natbib package
3
- %%
4
- %% Copyright 2010 Hal Daum\'e III
5
- %% Modified by J. F�rnkranz
6
- %% - Changed labels from (X and Y, 2000) to (X & Y, 2000)
7
- %%
8
- %% Copyright 1993-2007 Patrick W Daly
9
- %% Max-Planck-Institut f\"ur Sonnensystemforschung
10
- %% Max-Planck-Str. 2
11
- %% D-37191 Katlenburg-Lindau
12
- %% Germany
13
- %% E-mail: daly@mps.mpg.de
14
- %%
15
- %% This program can be redistributed and/or modified under the terms
16
- %% of the LaTeX Project Public License Distributed from CTAN
17
- %% archives in directory macros/latex/base/lppl.txt; either
18
- %% version 1 of the License, or any later version.
19
- %%
20
- % Version and source file information:
21
- % \ProvidesFile{icml2010.mbs}[2007/11/26 1.93 (PWD)]
22
- %
23
- % BibTeX `plainnat' family
24
- % version 0.99b for BibTeX versions 0.99a or later,
25
- % for LaTeX versions 2.09 and 2e.
26
- %
27
- % For use with the `natbib.sty' package; emulates the corresponding
28
- % member of the `plain' family, but with author-year citations.
29
- %
30
- % With version 6.0 of `natbib.sty', it may also be used for numerical
31
- % citations, while retaining the commands \citeauthor, \citefullauthor,
32
- % and \citeyear to print the corresponding information.
33
- %
34
- % For version 7.0 of `natbib.sty', the KEY field replaces missing
35
- % authors/editors, and the date is left blank in \bibitem.
36
- %
37
- % Includes field EID for the sequence/citation number of electronic journals
38
- % which is used instead of page numbers.
39
- %
40
- % Includes fields ISBN and ISSN.
41
- %
42
- % Includes field URL for Internet addresses.
43
- %
44
- % Includes field DOI for Digital Object Idenfifiers.
45
- %
46
- % Works best with the url.sty package of Donald Arseneau.
47
- %
48
- % Works with identical authors and year are further sorted by
49
- % citation key, to preserve any natural sequence.
50
- %
51
- ENTRY
52
- { address
53
- author
54
- booktitle
55
- chapter
56
- doi
57
- eid
58
- edition
59
- editor
60
- howpublished
61
- institution
62
- isbn
63
- issn
64
- journal
65
- key
66
- month
67
- note
68
- number
69
- organization
70
- pages
71
- publisher
72
- school
73
- series
74
- title
75
- type
76
- url
77
- volume
78
- year
79
- }
80
- {}
81
- { label extra.label sort.label short.list }
82
-
83
- INTEGERS { output.state before.all mid.sentence after.sentence after.block }
84
-
85
- FUNCTION {init.state.consts}
86
- { #0 'before.all :=
87
- #1 'mid.sentence :=
88
- #2 'after.sentence :=
89
- #3 'after.block :=
90
- }
91
-
92
- STRINGS { s t }
93
-
94
- FUNCTION {output.nonnull}
95
- { 's :=
96
- output.state mid.sentence =
97
- { ", " * write$ }
98
- { output.state after.block =
99
- { add.period$ write$
100
- newline$
101
- "\newblock " write$
102
- }
103
- { output.state before.all =
104
- 'write$
105
- { add.period$ " " * write$ }
106
- if$
107
- }
108
- if$
109
- mid.sentence 'output.state :=
110
- }
111
- if$
112
- s
113
- }
114
-
115
- FUNCTION {output}
116
- { duplicate$ empty$
117
- 'pop$
118
- 'output.nonnull
119
- if$
120
- }
121
-
122
- FUNCTION {output.check}
123
- { 't :=
124
- duplicate$ empty$
125
- { pop$ "empty " t * " in " * cite$ * warning$ }
126
- 'output.nonnull
127
- if$
128
- }
129
-
130
- FUNCTION {fin.entry}
131
- { add.period$
132
- write$
133
- newline$
134
- }
135
-
136
- FUNCTION {new.block}
137
- { output.state before.all =
138
- 'skip$
139
- { after.block 'output.state := }
140
- if$
141
- }
142
-
143
- FUNCTION {new.sentence}
144
- { output.state after.block =
145
- 'skip$
146
- { output.state before.all =
147
- 'skip$
148
- { after.sentence 'output.state := }
149
- if$
150
- }
151
- if$
152
- }
153
-
154
- FUNCTION {not}
155
- { { #0 }
156
- { #1 }
157
- if$
158
- }
159
-
160
- FUNCTION {and}
161
- { 'skip$
162
- { pop$ #0 }
163
- if$
164
- }
165
-
166
- FUNCTION {or}
167
- { { pop$ #1 }
168
- 'skip$
169
- if$
170
- }
171
-
172
- FUNCTION {new.block.checka}
173
- { empty$
174
- 'skip$
175
- 'new.block
176
- if$
177
- }
178
-
179
- FUNCTION {new.block.checkb}
180
- { empty$
181
- swap$ empty$
182
- and
183
- 'skip$
184
- 'new.block
185
- if$
186
- }
187
-
188
- FUNCTION {new.sentence.checka}
189
- { empty$
190
- 'skip$
191
- 'new.sentence
192
- if$
193
- }
194
-
195
- FUNCTION {new.sentence.checkb}
196
- { empty$
197
- swap$ empty$
198
- and
199
- 'skip$
200
- 'new.sentence
201
- if$
202
- }
203
-
204
- FUNCTION {field.or.null}
205
- { duplicate$ empty$
206
- { pop$ "" }
207
- 'skip$
208
- if$
209
- }
210
-
211
- FUNCTION {emphasize}
212
- { duplicate$ empty$
213
- { pop$ "" }
214
- { "\emph{" swap$ * "}" * }
215
- if$
216
- }
217
-
218
- INTEGERS { nameptr namesleft numnames }
219
-
220
- FUNCTION {format.names}
221
- { 's :=
222
- #1 'nameptr :=
223
- s num.names$ 'numnames :=
224
- numnames 'namesleft :=
225
- { namesleft #0 > }
226
- { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't :=
227
- nameptr #1 >
228
- { namesleft #1 >
229
- { ", " * t * }
230
- { numnames #2 >
231
- { "," * }
232
- 'skip$
233
- if$
234
- t "others" =
235
- { " et~al." * }
236
- { " and " * t * }
237
- if$
238
- }
239
- if$
240
- }
241
- 't
242
- if$
243
- nameptr #1 + 'nameptr :=
244
- namesleft #1 - 'namesleft :=
245
- }
246
- while$
247
- }
248
-
249
- FUNCTION {format.key}
250
- { empty$
251
- { key field.or.null }
252
- { "" }
253
- if$
254
- }
255
-
256
- FUNCTION {format.authors}
257
- { author empty$
258
- { "" }
259
- { author format.names }
260
- if$
261
- }
262
-
263
- FUNCTION {format.editors}
264
- { editor empty$
265
- { "" }
266
- { editor format.names
267
- editor num.names$ #1 >
268
- { " (eds.)" * }
269
- { " (ed.)" * }
270
- if$
271
- }
272
- if$
273
- }
274
-
275
- FUNCTION {format.isbn}
276
- { isbn empty$
277
- { "" }
278
- { new.block "ISBN " isbn * }
279
- if$
280
- }
281
-
282
- FUNCTION {format.issn}
283
- { issn empty$
284
- { "" }
285
- { new.block "ISSN " issn * }
286
- if$
287
- }
288
-
289
- FUNCTION {format.url}
290
- { url empty$
291
- { "" }
292
- { new.block "URL \url{" url * "}" * }
293
- if$
294
- }
295
-
296
- FUNCTION {format.doi}
297
- { doi empty$
298
- { "" }
299
- { new.block "\doi{" doi * "}" * }
300
- if$
301
- }
302
-
303
- FUNCTION {format.title}
304
- { title empty$
305
- { "" }
306
- { title "t" change.case$ }
307
- if$
308
- }
309
-
310
- FUNCTION {format.full.names}
311
- {'s :=
312
- #1 'nameptr :=
313
- s num.names$ 'numnames :=
314
- numnames 'namesleft :=
315
- { namesleft #0 > }
316
- { s nameptr
317
- "{vv~}{ll}" format.name$ 't :=
318
- nameptr #1 >
319
- {
320
- namesleft #1 >
321
- { ", " * t * }
322
- {
323
- numnames #2 >
324
- { "," * }
325
- 'skip$
326
- if$
327
- t "others" =
328
- { " et~al." * }
329
- { " and " * t * }
330
- if$
331
- }
332
- if$
333
- }
334
- 't
335
- if$
336
- nameptr #1 + 'nameptr :=
337
- namesleft #1 - 'namesleft :=
338
- }
339
- while$
340
- }
341
-
342
- FUNCTION {author.editor.full}
343
- { author empty$
344
- { editor empty$
345
- { "" }
346
- { editor format.full.names }
347
- if$
348
- }
349
- { author format.full.names }
350
- if$
351
- }
352
-
353
- FUNCTION {author.full}
354
- { author empty$
355
- { "" }
356
- { author format.full.names }
357
- if$
358
- }
359
-
360
- FUNCTION {editor.full}
361
- { editor empty$
362
- { "" }
363
- { editor format.full.names }
364
- if$
365
- }
366
-
367
- FUNCTION {make.full.names}
368
- { type$ "book" =
369
- type$ "inbook" =
370
- or
371
- 'author.editor.full
372
- { type$ "proceedings" =
373
- 'editor.full
374
- 'author.full
375
- if$
376
- }
377
- if$
378
- }
379
-
380
- FUNCTION {output.bibitem}
381
- { newline$
382
- "\bibitem[" write$
383
- label write$
384
- ")" make.full.names duplicate$ short.list =
385
- { pop$ }
386
- { * }
387
- if$
388
- "]{" * write$
389
- cite$ write$
390
- "}" write$
391
- newline$
392
- ""
393
- before.all 'output.state :=
394
- }
395
-
396
- FUNCTION {n.dashify}
397
- { 't :=
398
- ""
399
- { t empty$ not }
400
- { t #1 #1 substring$ "-" =
401
- { t #1 #2 substring$ "--" = not
402
- { "--" *
403
- t #2 global.max$ substring$ 't :=
404
- }
405
- { { t #1 #1 substring$ "-" = }
406
- { "-" *
407
- t #2 global.max$ substring$ 't :=
408
- }
409
- while$
410
- }
411
- if$
412
- }
413
- { t #1 #1 substring$ *
414
- t #2 global.max$ substring$ 't :=
415
- }
416
- if$
417
- }
418
- while$
419
- }
420
-
421
- FUNCTION {format.date}
422
- { year duplicate$ empty$
423
- { "empty year in " cite$ * warning$
424
- pop$ "" }
425
- 'skip$
426
- if$
427
- month empty$
428
- 'skip$
429
- { month
430
- " " * swap$ *
431
- }
432
- if$
433
- extra.label *
434
- }
435
-
436
- FUNCTION {format.btitle}
437
- { title emphasize
438
- }
439
-
440
- FUNCTION {tie.or.space.connect}
441
- { duplicate$ text.length$ #3 <
442
- { "~" }
443
- { " " }
444
- if$
445
- swap$ * *
446
- }
447
-
448
- FUNCTION {either.or.check}
449
- { empty$
450
- 'pop$
451
- { "can't use both " swap$ * " fields in " * cite$ * warning$ }
452
- if$
453
- }
454
-
455
- FUNCTION {format.bvolume}
456
- { volume empty$
457
- { "" }
458
- { "volume" volume tie.or.space.connect
459
- series empty$
460
- 'skip$
461
- { " of " * series emphasize * }
462
- if$
463
- "volume and number" number either.or.check
464
- }
465
- if$
466
- }
467
-
468
- FUNCTION {format.number.series}
469
- { volume empty$
470
- { number empty$
471
- { series field.or.null }
472
- { output.state mid.sentence =
473
- { "number" }
474
- { "Number" }
475
- if$
476
- number tie.or.space.connect
477
- series empty$
478
- { "there's a number but no series in " cite$ * warning$ }
479
- { " in " * series * }
480
- if$
481
- }
482
- if$
483
- }
484
- { "" }
485
- if$
486
- }
487
-
488
- FUNCTION {format.edition}
489
- { edition empty$
490
- { "" }
491
- { output.state mid.sentence =
492
- { edition "l" change.case$ " edition" * }
493
- { edition "t" change.case$ " edition" * }
494
- if$
495
- }
496
- if$
497
- }
498
-
499
- INTEGERS { multiresult }
500
-
501
- FUNCTION {multi.page.check}
502
- { 't :=
503
- #0 'multiresult :=
504
- { multiresult not
505
- t empty$ not
506
- and
507
- }
508
- { t #1 #1 substring$
509
- duplicate$ "-" =
510
- swap$ duplicate$ "," =
511
- swap$ "+" =
512
- or or
513
- { #1 'multiresult := }
514
- { t #2 global.max$ substring$ 't := }
515
- if$
516
- }
517
- while$
518
- multiresult
519
- }
520
-
521
- FUNCTION {format.pages}
522
- { pages empty$
523
- { "" }
524
- { pages multi.page.check
525
- { "pp.\ " pages n.dashify tie.or.space.connect }
526
- { "pp.\ " pages tie.or.space.connect }
527
- if$
528
- }
529
- if$
530
- }
531
-
532
- FUNCTION {format.eid}
533
- { eid empty$
534
- { "" }
535
- { "art." eid tie.or.space.connect }
536
- if$
537
- }
538
-
539
- FUNCTION {format.vol.num.pages}
540
- { volume field.or.null
541
- number empty$
542
- 'skip$
543
- { "\penalty0 (" number * ")" * *
544
- volume empty$
545
- { "there's a number but no volume in " cite$ * warning$ }
546
- 'skip$
547
- if$
548
- }
549
- if$
550
- pages empty$
551
- 'skip$
552
- { duplicate$ empty$
553
- { pop$ format.pages }
554
- { ":\penalty0 " * pages n.dashify * }
555
- if$
556
- }
557
- if$
558
- }
559
-
560
- FUNCTION {format.vol.num.eid}
561
- { volume field.or.null
562
- number empty$
563
- 'skip$
564
- { "\penalty0 (" number * ")" * *
565
- volume empty$
566
- { "there's a number but no volume in " cite$ * warning$ }
567
- 'skip$
568
- if$
569
- }
570
- if$
571
- eid empty$
572
- 'skip$
573
- { duplicate$ empty$
574
- { pop$ format.eid }
575
- { ":\penalty0 " * eid * }
576
- if$
577
- }
578
- if$
579
- }
580
-
581
- FUNCTION {format.chapter.pages}
582
- { chapter empty$
583
- 'format.pages
584
- { type empty$
585
- { "chapter" }
586
- { type "l" change.case$ }
587
- if$
588
- chapter tie.or.space.connect
589
- pages empty$
590
- 'skip$
591
- { ", " * format.pages * }
592
- if$
593
- }
594
- if$
595
- }
596
-
597
- FUNCTION {format.in.ed.booktitle}
598
- { booktitle empty$
599
- { "" }
600
- { editor empty$
601
- { "In " booktitle emphasize * }
602
- { "In " format.editors * ", " * booktitle emphasize * }
603
- if$
604
- }
605
- if$
606
- }
607
-
608
- FUNCTION {empty.misc.check}
609
- { author empty$ title empty$ howpublished empty$
610
- month empty$ year empty$ note empty$
611
- and and and and and
612
- key empty$ not and
613
- { "all relevant fields are empty in " cite$ * warning$ }
614
- 'skip$
615
- if$
616
- }
617
-
618
- FUNCTION {format.thesis.type}
619
- { type empty$
620
- 'skip$
621
- { pop$
622
- type "t" change.case$
623
- }
624
- if$
625
- }
626
-
627
- FUNCTION {format.tr.number}
628
- { type empty$
629
- { "Technical Report" }
630
- 'type
631
- if$
632
- number empty$
633
- { "t" change.case$ }
634
- { number tie.or.space.connect }
635
- if$
636
- }
637
-
638
- FUNCTION {format.article.crossref}
639
- { key empty$
640
- { journal empty$
641
- { "need key or journal for " cite$ * " to crossref " * crossref *
642
- warning$
643
- ""
644
- }
645
- { "In \emph{" journal * "}" * }
646
- if$
647
- }
648
- { "In " }
649
- if$
650
- " \citet{" * crossref * "}" *
651
- }
652
-
653
- FUNCTION {format.book.crossref}
654
- { volume empty$
655
- { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
656
- "In "
657
- }
658
- { "Volume" volume tie.or.space.connect
659
- " of " *
660
- }
661
- if$
662
- editor empty$
663
- editor field.or.null author field.or.null =
664
- or
665
- { key empty$
666
- { series empty$
667
- { "need editor, key, or series for " cite$ * " to crossref " *
668
- crossref * warning$
669
- "" *
670
- }
671
- { "\emph{" * series * "}" * }
672
- if$
673
- }
674
- 'skip$
675
- if$
676
- }
677
- 'skip$
678
- if$
679
- " \citet{" * crossref * "}" *
680
- }
681
-
682
- FUNCTION {format.incoll.inproc.crossref}
683
- { editor empty$
684
- editor field.or.null author field.or.null =
685
- or
686
- { key empty$
687
- { booktitle empty$
688
- { "need editor, key, or booktitle for " cite$ * " to crossref " *
689
- crossref * warning$
690
- ""
691
- }
692
- { "In \emph{" booktitle * "}" * }
693
- if$
694
- }
695
- { "In " }
696
- if$
697
- }
698
- { "In " }
699
- if$
700
- " \citet{" * crossref * "}" *
701
- }
702
-
703
- FUNCTION {article}
704
- { output.bibitem
705
- format.authors "author" output.check
706
- author format.key output
707
- new.block
708
- format.title "title" output.check
709
- new.block
710
- crossref missing$
711
- { journal emphasize "journal" output.check
712
- eid empty$
713
- { format.vol.num.pages output }
714
- { format.vol.num.eid output }
715
- if$
716
- format.date "year" output.check
717
- }
718
- { format.article.crossref output.nonnull
719
- eid empty$
720
- { format.pages output }
721
- { format.eid output }
722
- if$
723
- }
724
- if$
725
- format.issn output
726
- format.doi output
727
- format.url output
728
- new.block
729
- note output
730
- fin.entry
731
- }
732
-
733
- FUNCTION {book}
734
- { output.bibitem
735
- author empty$
736
- { format.editors "author and editor" output.check
737
- editor format.key output
738
- }
739
- { format.authors output.nonnull
740
- crossref missing$
741
- { "author and editor" editor either.or.check }
742
- 'skip$
743
- if$
744
- }
745
- if$
746
- new.block
747
- format.btitle "title" output.check
748
- crossref missing$
749
- { format.bvolume output
750
- new.block
751
- format.number.series output
752
- new.sentence
753
- publisher "publisher" output.check
754
- address output
755
- }
756
- { new.block
757
- format.book.crossref output.nonnull
758
- }
759
- if$
760
- format.edition output
761
- format.date "year" output.check
762
- format.isbn output
763
- format.doi output
764
- format.url output
765
- new.block
766
- note output
767
- fin.entry
768
- }
769
-
770
- FUNCTION {booklet}
771
- { output.bibitem
772
- format.authors output
773
- author format.key output
774
- new.block
775
- format.title "title" output.check
776
- howpublished address new.block.checkb
777
- howpublished output
778
- address output
779
- format.date output
780
- format.isbn output
781
- format.doi output
782
- format.url output
783
- new.block
784
- note output
785
- fin.entry
786
- }
787
-
788
- FUNCTION {inbook}
789
- { output.bibitem
790
- author empty$
791
- { format.editors "author and editor" output.check
792
- editor format.key output
793
- }
794
- { format.authors output.nonnull
795
- crossref missing$
796
- { "author and editor" editor either.or.check }
797
- 'skip$
798
- if$
799
- }
800
- if$
801
- new.block
802
- format.btitle "title" output.check
803
- crossref missing$
804
- { format.bvolume output
805
- format.chapter.pages "chapter and pages" output.check
806
- new.block
807
- format.number.series output
808
- new.sentence
809
- publisher "publisher" output.check
810
- address output
811
- }
812
- { format.chapter.pages "chapter and pages" output.check
813
- new.block
814
- format.book.crossref output.nonnull
815
- }
816
- if$
817
- format.edition output
818
- format.date "year" output.check
819
- format.isbn output
820
- format.doi output
821
- format.url output
822
- new.block
823
- note output
824
- fin.entry
825
- }
826
-
827
- FUNCTION {incollection}
828
- { output.bibitem
829
- format.authors "author" output.check
830
- author format.key output
831
- new.block
832
- format.title "title" output.check
833
- new.block
834
- crossref missing$
835
- { format.in.ed.booktitle "booktitle" output.check
836
- format.bvolume output
837
- format.number.series output
838
- format.chapter.pages output
839
- new.sentence
840
- publisher "publisher" output.check
841
- address output
842
- format.edition output
843
- format.date "year" output.check
844
- }
845
- { format.incoll.inproc.crossref output.nonnull
846
- format.chapter.pages output
847
- }
848
- if$
849
- format.isbn output
850
- format.doi output
851
- format.url output
852
- new.block
853
- note output
854
- fin.entry
855
- }
856
-
857
- FUNCTION {inproceedings}
858
- { output.bibitem
859
- format.authors "author" output.check
860
- author format.key output
861
- new.block
862
- format.title "title" output.check
863
- new.block
864
- crossref missing$
865
- { format.in.ed.booktitle "booktitle" output.check
866
- format.bvolume output
867
- format.number.series output
868
- format.pages output
869
- address empty$
870
- { organization publisher new.sentence.checkb
871
- organization output
872
- publisher output
873
- format.date "year" output.check
874
- }
875
- { address output.nonnull
876
- format.date "year" output.check
877
- new.sentence
878
- organization output
879
- publisher output
880
- }
881
- if$
882
- }
883
- { format.incoll.inproc.crossref output.nonnull
884
- format.pages output
885
- }
886
- if$
887
- format.isbn output
888
- format.doi output
889
- format.url output
890
- new.block
891
- note output
892
- fin.entry
893
- }
894
-
895
- FUNCTION {conference} { inproceedings }
896
-
897
- FUNCTION {manual}
898
- { output.bibitem
899
- format.authors output
900
- author format.key output
901
- new.block
902
- format.btitle "title" output.check
903
- organization address new.block.checkb
904
- organization output
905
- address output
906
- format.edition output
907
- format.date output
908
- format.url output
909
- new.block
910
- note output
911
- fin.entry
912
- }
913
-
914
- FUNCTION {mastersthesis}
915
- { output.bibitem
916
- format.authors "author" output.check
917
- author format.key output
918
- new.block
919
- format.title "title" output.check
920
- new.block
921
- "Master's thesis" format.thesis.type output.nonnull
922
- school "school" output.check
923
- address output
924
- format.date "year" output.check
925
- format.url output
926
- new.block
927
- note output
928
- fin.entry
929
- }
930
-
931
- FUNCTION {misc}
932
- { output.bibitem
933
- format.authors output
934
- author format.key output
935
- title howpublished new.block.checkb
936
- format.title output
937
- howpublished new.block.checka
938
- howpublished output
939
- format.date output
940
- format.issn output
941
- format.url output
942
- new.block
943
- note output
944
- fin.entry
945
- empty.misc.check
946
- }
947
-
948
- FUNCTION {phdthesis}
949
- { output.bibitem
950
- format.authors "author" output.check
951
- author format.key output
952
- new.block
953
- format.btitle "title" output.check
954
- new.block
955
- "PhD thesis" format.thesis.type output.nonnull
956
- school "school" output.check
957
- address output
958
- format.date "year" output.check
959
- format.url output
960
- new.block
961
- note output
962
- fin.entry
963
- }
964
-
965
- FUNCTION {proceedings}
966
- { output.bibitem
967
- format.editors output
968
- editor format.key output
969
- new.block
970
- format.btitle "title" output.check
971
- format.bvolume output
972
- format.number.series output
973
- address output
974
- format.date "year" output.check
975
- new.sentence
976
- organization output
977
- publisher output
978
- format.isbn output
979
- format.doi output
980
- format.url output
981
- new.block
982
- note output
983
- fin.entry
984
- }
985
-
986
- FUNCTION {techreport}
987
- { output.bibitem
988
- format.authors "author" output.check
989
- author format.key output
990
- new.block
991
- format.title "title" output.check
992
- new.block
993
- format.tr.number output.nonnull
994
- institution "institution" output.check
995
- address output
996
- format.date "year" output.check
997
- format.url output
998
- new.block
999
- note output
1000
- fin.entry
1001
- }
1002
-
1003
- FUNCTION {unpublished}
1004
- { output.bibitem
1005
- format.authors "author" output.check
1006
- author format.key output
1007
- new.block
1008
- format.title "title" output.check
1009
- new.block
1010
- note "note" output.check
1011
- format.date output
1012
- format.url output
1013
- fin.entry
1014
- }
1015
-
1016
- FUNCTION {default.type} { misc }
1017
-
1018
-
1019
- MACRO {jan} {"January"}
1020
-
1021
- MACRO {feb} {"February"}
1022
-
1023
- MACRO {mar} {"March"}
1024
-
1025
- MACRO {apr} {"April"}
1026
-
1027
- MACRO {may} {"May"}
1028
-
1029
- MACRO {jun} {"June"}
1030
-
1031
- MACRO {jul} {"July"}
1032
-
1033
- MACRO {aug} {"August"}
1034
-
1035
- MACRO {sep} {"September"}
1036
-
1037
- MACRO {oct} {"October"}
1038
-
1039
- MACRO {nov} {"November"}
1040
-
1041
- MACRO {dec} {"December"}
1042
-
1043
-
1044
-
1045
- MACRO {acmcs} {"ACM Computing Surveys"}
1046
-
1047
- MACRO {acta} {"Acta Informatica"}
1048
-
1049
- MACRO {cacm} {"Communications of the ACM"}
1050
-
1051
- MACRO {ibmjrd} {"IBM Journal of Research and Development"}
1052
-
1053
- MACRO {ibmsj} {"IBM Systems Journal"}
1054
-
1055
- MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
1056
-
1057
- MACRO {ieeetc} {"IEEE Transactions on Computers"}
1058
-
1059
- MACRO {ieeetcad}
1060
- {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
1061
-
1062
- MACRO {ipl} {"Information Processing Letters"}
1063
-
1064
- MACRO {jacm} {"Journal of the ACM"}
1065
-
1066
- MACRO {jcss} {"Journal of Computer and System Sciences"}
1067
-
1068
- MACRO {scp} {"Science of Computer Programming"}
1069
-
1070
- MACRO {sicomp} {"SIAM Journal on Computing"}
1071
-
1072
- MACRO {tocs} {"ACM Transactions on Computer Systems"}
1073
-
1074
- MACRO {tods} {"ACM Transactions on Database Systems"}
1075
-
1076
- MACRO {tog} {"ACM Transactions on Graphics"}
1077
-
1078
- MACRO {toms} {"ACM Transactions on Mathematical Software"}
1079
-
1080
- MACRO {toois} {"ACM Transactions on Office Information Systems"}
1081
-
1082
- MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
1083
-
1084
- MACRO {tcs} {"Theoretical Computer Science"}
1085
-
1086
-
1087
- READ
1088
-
1089
- FUNCTION {sortify}
1090
- { purify$
1091
- "l" change.case$
1092
- }
1093
-
1094
- INTEGERS { len }
1095
-
1096
- FUNCTION {chop.word}
1097
- { 's :=
1098
- 'len :=
1099
- s #1 len substring$ =
1100
- { s len #1 + global.max$ substring$ }
1101
- 's
1102
- if$
1103
- }
1104
-
1105
- FUNCTION {format.lab.names}
1106
- { 's :=
1107
- s #1 "{vv~}{ll}" format.name$
1108
- s num.names$ duplicate$
1109
- #2 >
1110
- { pop$ " et~al." * }
1111
- { #2 <
1112
- 'skip$
1113
- { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
1114
- { " et~al." * }
1115
- { " \& " * s #2 "{vv~}{ll}" format.name$ * }
1116
- if$
1117
- }
1118
- if$
1119
- }
1120
- if$
1121
- }
1122
-
1123
- FUNCTION {author.key.label}
1124
- { author empty$
1125
- { key empty$
1126
- { cite$ #1 #3 substring$ }
1127
- 'key
1128
- if$
1129
- }
1130
- { author format.lab.names }
1131
- if$
1132
- }
1133
-
1134
- FUNCTION {author.editor.key.label}
1135
- { author empty$
1136
- { editor empty$
1137
- { key empty$
1138
- { cite$ #1 #3 substring$ }
1139
- 'key
1140
- if$
1141
- }
1142
- { editor format.lab.names }
1143
- if$
1144
- }
1145
- { author format.lab.names }
1146
- if$
1147
- }
1148
-
1149
- FUNCTION {author.key.organization.label}
1150
- { author empty$
1151
- { key empty$
1152
- { organization empty$
1153
- { cite$ #1 #3 substring$ }
1154
- { "The " #4 organization chop.word #3 text.prefix$ }
1155
- if$
1156
- }
1157
- 'key
1158
- if$
1159
- }
1160
- { author format.lab.names }
1161
- if$
1162
- }
1163
-
1164
- FUNCTION {editor.key.organization.label}
1165
- { editor empty$
1166
- { key empty$
1167
- { organization empty$
1168
- { cite$ #1 #3 substring$ }
1169
- { "The " #4 organization chop.word #3 text.prefix$ }
1170
- if$
1171
- }
1172
- 'key
1173
- if$
1174
- }
1175
- { editor format.lab.names }
1176
- if$
1177
- }
1178
-
1179
- FUNCTION {calc.short.authors}
1180
- { type$ "book" =
1181
- type$ "inbook" =
1182
- or
1183
- 'author.editor.key.label
1184
- { type$ "proceedings" =
1185
- 'editor.key.organization.label
1186
- { type$ "manual" =
1187
- 'author.key.organization.label
1188
- 'author.key.label
1189
- if$
1190
- }
1191
- if$
1192
- }
1193
- if$
1194
- 'short.list :=
1195
- }
1196
-
1197
- FUNCTION {calc.label}
1198
- { calc.short.authors
1199
- short.list
1200
- "("
1201
- *
1202
- year duplicate$ empty$
1203
- short.list key field.or.null = or
1204
- { pop$ "" }
1205
- 'skip$
1206
- if$
1207
- *
1208
- 'label :=
1209
- }
1210
-
1211
- FUNCTION {sort.format.names}
1212
- { 's :=
1213
- #1 'nameptr :=
1214
- ""
1215
- s num.names$ 'numnames :=
1216
- numnames 'namesleft :=
1217
- { namesleft #0 > }
1218
- {
1219
- s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't :=
1220
- nameptr #1 >
1221
- {
1222
- " " *
1223
- namesleft #1 = t "others" = and
1224
- { "zzzzz" * }
1225
- { numnames #2 > nameptr #2 = and
1226
- { "zz" * year field.or.null * " " * }
1227
- 'skip$
1228
- if$
1229
- t sortify *
1230
- }
1231
- if$
1232
- }
1233
- { t sortify * }
1234
- if$
1235
- nameptr #1 + 'nameptr :=
1236
- namesleft #1 - 'namesleft :=
1237
- }
1238
- while$
1239
- }
1240
-
1241
- FUNCTION {sort.format.title}
1242
- { 't :=
1243
- "A " #2
1244
- "An " #3
1245
- "The " #4 t chop.word
1246
- chop.word
1247
- chop.word
1248
- sortify
1249
- #1 global.max$ substring$
1250
- }
1251
-
1252
- FUNCTION {author.sort}
1253
- { author empty$
1254
- { key empty$
1255
- { "to sort, need author or key in " cite$ * warning$
1256
- ""
1257
- }
1258
- { key sortify }
1259
- if$
1260
- }
1261
- { author sort.format.names }
1262
- if$
1263
- }
1264
-
1265
- FUNCTION {author.editor.sort}
1266
- { author empty$
1267
- { editor empty$
1268
- { key empty$
1269
- { "to sort, need author, editor, or key in " cite$ * warning$
1270
- ""
1271
- }
1272
- { key sortify }
1273
- if$
1274
- }
1275
- { editor sort.format.names }
1276
- if$
1277
- }
1278
- { author sort.format.names }
1279
- if$
1280
- }
1281
-
1282
- FUNCTION {author.organization.sort}
1283
- { author empty$
1284
- { organization empty$
1285
- { key empty$
1286
- { "to sort, need author, organization, or key in " cite$ * warning$
1287
- ""
1288
- }
1289
- { key sortify }
1290
- if$
1291
- }
1292
- { "The " #4 organization chop.word sortify }
1293
- if$
1294
- }
1295
- { author sort.format.names }
1296
- if$
1297
- }
1298
-
1299
- FUNCTION {editor.organization.sort}
1300
- { editor empty$
1301
- { organization empty$
1302
- { key empty$
1303
- { "to sort, need editor, organization, or key in " cite$ * warning$
1304
- ""
1305
- }
1306
- { key sortify }
1307
- if$
1308
- }
1309
- { "The " #4 organization chop.word sortify }
1310
- if$
1311
- }
1312
- { editor sort.format.names }
1313
- if$
1314
- }
1315
-
1316
-
1317
- FUNCTION {presort}
1318
- { calc.label
1319
- label sortify
1320
- " "
1321
- *
1322
- type$ "book" =
1323
- type$ "inbook" =
1324
- or
1325
- 'author.editor.sort
1326
- { type$ "proceedings" =
1327
- 'editor.organization.sort
1328
- { type$ "manual" =
1329
- 'author.organization.sort
1330
- 'author.sort
1331
- if$
1332
- }
1333
- if$
1334
- }
1335
- if$
1336
- " "
1337
- *
1338
- year field.or.null sortify
1339
- *
1340
- " "
1341
- *
1342
- cite$
1343
- *
1344
- #1 entry.max$ substring$
1345
- 'sort.label :=
1346
- sort.label *
1347
- #1 entry.max$ substring$
1348
- 'sort.key$ :=
1349
- }
1350
-
1351
- ITERATE {presort}
1352
-
1353
- SORT
1354
-
1355
- STRINGS { longest.label last.label next.extra }
1356
-
1357
- INTEGERS { longest.label.width last.extra.num number.label }
1358
-
1359
- FUNCTION {initialize.longest.label}
1360
- { "" 'longest.label :=
1361
- #0 int.to.chr$ 'last.label :=
1362
- "" 'next.extra :=
1363
- #0 'longest.label.width :=
1364
- #0 'last.extra.num :=
1365
- #0 'number.label :=
1366
- }
1367
-
1368
- FUNCTION {forward.pass}
1369
- { last.label label =
1370
- { last.extra.num #1 + 'last.extra.num :=
1371
- last.extra.num int.to.chr$ 'extra.label :=
1372
- }
1373
- { "a" chr.to.int$ 'last.extra.num :=
1374
- "" 'extra.label :=
1375
- label 'last.label :=
1376
- }
1377
- if$
1378
- number.label #1 + 'number.label :=
1379
- }
1380
-
1381
- FUNCTION {reverse.pass}
1382
- { next.extra "b" =
1383
- { "a" 'extra.label := }
1384
- 'skip$
1385
- if$
1386
- extra.label 'next.extra :=
1387
- extra.label
1388
- duplicate$ empty$
1389
- 'skip$
1390
- { "{\natexlab{" swap$ * "}}" * }
1391
- if$
1392
- 'extra.label :=
1393
- label extra.label * 'label :=
1394
- }
1395
-
1396
- EXECUTE {initialize.longest.label}
1397
-
1398
- ITERATE {forward.pass}
1399
-
1400
- REVERSE {reverse.pass}
1401
-
1402
- FUNCTION {bib.sort.order}
1403
- { sort.label 'sort.key$ :=
1404
- }
1405
-
1406
- ITERATE {bib.sort.order}
1407
-
1408
- SORT
1409
-
1410
- FUNCTION {begin.bib}
1411
- { preamble$ empty$
1412
- 'skip$
1413
- { preamble$ write$ newline$ }
1414
- if$
1415
- "\begin{thebibliography}{" number.label int.to.str$ * "}" *
1416
- write$ newline$
1417
- "\providecommand{\natexlab}[1]{#1}"
1418
- write$ newline$
1419
- "\providecommand{\url}[1]{\texttt{#1}}"
1420
- write$ newline$
1421
- "\expandafter\ifx\csname urlstyle\endcsname\relax"
1422
- write$ newline$
1423
- " \providecommand{\doi}[1]{doi: #1}\else"
1424
- write$ newline$
1425
- " \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi"
1426
- write$ newline$
1427
- }
1428
-
1429
- EXECUTE {begin.bib}
1430
-
1431
- EXECUTE {init.state.consts}
1432
-
1433
- ITERATE {call.type$}
1434
-
1435
- FUNCTION {end.bib}
1436
- { newline$
1437
- "\end{thebibliography}" write$ newline$
1438
- }
1439
-
1440
- EXECUTE {end.bib}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/iclr2022_conference.sty DELETED
@@ -1,245 +0,0 @@
1
- %%%% ICLR Macros (LaTex)
2
- %%%% Adapted by Hugo Larochelle from the NIPS stylefile Macros
3
- %%%% Style File
4
- %%%% Dec 12, 1990 Rev Aug 14, 1991; Sept, 1995; April, 1997; April, 1999; October 2014
5
-
6
- % This file can be used with Latex2e whether running in main mode, or
7
- % 2.09 compatibility mode.
8
- %
9
- % If using main mode, you need to include the commands
10
- % \documentclass{article}
11
- % \usepackage{iclr14submit_e,times}
12
- %
13
-
14
- % Change the overall width of the page. If these parameters are
15
- % changed, they will require corresponding changes in the
16
- % maketitle section.
17
- %
18
- \usepackage{eso-pic} % used by \AddToShipoutPicture
19
- \RequirePackage{fancyhdr}
20
- \RequirePackage{natbib}
21
-
22
- % modification to natbib citations
23
- \setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
24
-
25
- \renewcommand{\topfraction}{0.95} % let figure take up nearly whole page
26
- \renewcommand{\textfraction}{0.05} % let figure take up nearly whole page
27
-
28
- % Define iclrfinal, set to true if iclrfinalcopy is defined
29
- \newif\ificlrfinal
30
- \iclrfinalfalse
31
- \def\iclrfinalcopy{\iclrfinaltrue}
32
- \font\iclrtenhv = phvb at 8pt
33
-
34
- % Specify the dimensions of each page
35
-
36
- \setlength{\paperheight}{11in}
37
- \setlength{\paperwidth}{8.5in}
38
-
39
-
40
- \oddsidemargin .5in % Note \oddsidemargin = \evensidemargin
41
- \evensidemargin .5in
42
- \marginparwidth 0.07 true in
43
- %\marginparwidth 0.75 true in
44
- %\topmargin 0 true pt % Nominal distance from top of page to top of
45
- %\topmargin 0.125in
46
- \topmargin -0.625in
47
- \addtolength{\headsep}{0.25in}
48
- \textheight 9.0 true in % Height of text (including footnotes & figures)
49
- \textwidth 5.5 true in % Width of text line.
50
- \widowpenalty=10000
51
- \clubpenalty=10000
52
-
53
- % \thispagestyle{empty} \pagestyle{empty}
54
- \flushbottom \sloppy
55
-
56
- % We're never going to need a table of contents, so just flush it to
57
- % save space --- suggested by drstrip@sandia-2
58
- \def\addcontentsline#1#2#3{}
59
-
60
- % Title stuff, taken from deproc.
61
- \def\maketitle{\par
62
- \begingroup
63
- \def\thefootnote{\fnsymbol{footnote}}
64
- \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} % for perfect author
65
- % name centering
66
- % The footnote-mark was overlapping the footnote-text,
67
- % added the following to fix this problem (MK)
68
- \long\def\@makefntext##1{\parindent 1em\noindent
69
- \hbox to1.8em{\hss $\m@th ^{\@thefnmark}$}##1}
70
- \@maketitle \@thanks
71
- \endgroup
72
- \setcounter{footnote}{0}
73
- \let\maketitle\relax \let\@maketitle\relax
74
- \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
75
-
76
- % The toptitlebar has been raised to top-justify the first page
77
-
78
- \usepackage{fancyhdr}
79
- \pagestyle{fancy}
80
- \fancyhead{}
81
-
82
- % Title (includes both anonimized and non-anonimized versions)
83
- \def\@maketitle{\vbox{\hsize\textwidth
84
- %\linewidth\hsize \vskip 0.1in \toptitlebar \centering
85
- {\LARGE\sc \@title\par}
86
- %\bottomtitlebar % \vskip 0.1in % minus
87
- \ificlrfinal
88
- \lhead{Published as a conference paper at ICLR 2022}
89
- \def\And{\end{tabular}\hfil\linebreak[0]\hfil
90
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
91
- \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
92
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
93
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\@author\end{tabular}%
94
- \else
95
- \lhead{Under review as a conference paper at ICLR 2022}
96
- \def\And{\end{tabular}\hfil\linebreak[0]\hfil
97
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
98
- \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
99
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
100
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}Anonymous authors\\Paper under double-blind review\end{tabular}%
101
- \fi
102
- \vskip 0.3in minus 0.1in}}
103
-
104
- \renewenvironment{abstract}{\vskip.075in\centerline{\large\sc
105
- Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
106
-
107
- % sections with less space
108
- \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
109
- -0.5ex minus -.2ex}{1.5ex plus 0.3ex
110
- minus0.2ex}{\large\sc\raggedright}}
111
-
112
- \def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
113
- -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\sc\raggedright}}
114
- \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex
115
- plus -0.5ex minus -.2ex}{0.5ex plus
116
- .2ex}{\normalsize\sc\raggedright}}
117
- \def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
118
- 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
119
- \def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
120
- 0.5ex minus .2ex}{-1em}{\normalsize\sc}}
121
- \def\subsubsubsection{\vskip
122
- 5pt{\noindent\normalsize\rm\raggedright}}
123
-
124
-
125
- % Footnotes
126
- \footnotesep 6.65pt %
127
- \skip\footins 9pt plus 4pt minus 2pt
128
- \def\footnoterule{\kern-3pt \hrule width 12pc \kern 2.6pt }
129
- \setcounter{footnote}{0}
130
-
131
- % Lists and paragraphs
132
- \parindent 0pt
133
- \topsep 4pt plus 1pt minus 2pt
134
- \partopsep 1pt plus 0.5pt minus 0.5pt
135
- \itemsep 2pt plus 1pt minus 0.5pt
136
- \parsep 2pt plus 1pt minus 0.5pt
137
- \parskip .5pc
138
-
139
-
140
- %\leftmargin2em
141
- \leftmargin3pc
142
- \leftmargini\leftmargin \leftmarginii 2em
143
- \leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
144
-
145
- %\labelsep \labelsep 5pt
146
-
147
- \def\@listi{\leftmargin\leftmargini}
148
- \def\@listii{\leftmargin\leftmarginii
149
- \labelwidth\leftmarginii\advance\labelwidth-\labelsep
150
- \topsep 2pt plus 1pt minus 0.5pt
151
- \parsep 1pt plus 0.5pt minus 0.5pt
152
- \itemsep \parsep}
153
- \def\@listiii{\leftmargin\leftmarginiii
154
- \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
155
- \topsep 1pt plus 0.5pt minus 0.5pt
156
- \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
157
- \itemsep \topsep}
158
- \def\@listiv{\leftmargin\leftmarginiv
159
- \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
160
- \def\@listv{\leftmargin\leftmarginv
161
- \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
162
- \def\@listvi{\leftmargin\leftmarginvi
163
- \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
164
-
165
- \abovedisplayskip 7pt plus2pt minus5pt%
166
- \belowdisplayskip \abovedisplayskip
167
- \abovedisplayshortskip 0pt plus3pt%
168
- \belowdisplayshortskip 4pt plus3pt minus3pt%
169
-
170
- % Less leading in most fonts (due to the narrow columns)
171
- % The choices were between 1-pt and 1.5-pt leading
172
- %\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} % got rid of @ (MK)
173
- \def\normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
174
- \def\small{\@setsize\small{10pt}\ixpt\@ixpt}
175
- \def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
176
- \def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
177
- \def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
178
- \def\large{\@setsize\large{14pt}\xiipt\@xiipt}
179
- \def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
180
- \def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
181
- \def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
182
- \def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
183
-
184
- \def\toptitlebar{\hrule height4pt\vskip .25in\vskip-\parskip}
185
-
186
- \def\bottomtitlebar{\vskip .29in\vskip-\parskip\hrule height1pt\vskip
187
- .09in} %
188
- %Reduced second vskip to compensate for adding the strut in \@author
189
-
190
-
191
- %% % Vertical Ruler
192
- %% % This code is, largely, from the CVPR 2010 conference style file
193
- %% % ----- define vruler
194
- %% \makeatletter
195
- %% \newbox\iclrrulerbox
196
- %% \newcount\iclrrulercount
197
- %% \newdimen\iclrruleroffset
198
- %% \newdimen\cv@lineheight
199
- %% \newdimen\cv@boxheight
200
- %% \newbox\cv@tmpbox
201
- %% \newcount\cv@refno
202
- %% \newcount\cv@tot
203
- %% % NUMBER with left flushed zeros \fillzeros[<WIDTH>]<NUMBER>
204
- %% \newcount\cv@tmpc@ \newcount\cv@tmpc
205
- %% \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
206
- %% \cv@tmpc=1 %
207
- %% \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
208
- %% \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
209
- %% \ifnum#2<0\advance\cv@tmpc1\relax-\fi
210
- %% \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
211
- %% \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
212
- %% % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
213
- %% \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip
214
- %% \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
215
- %% \global\setbox\iclrrulerbox=\vbox to \textheight{%
216
- %% {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
217
- %% \cv@lineheight=#1\global\iclrrulercount=#2%
218
- %% \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
219
- %% \cv@refno1\vskip-\cv@lineheight\vskip1ex%
220
- %% \loop\setbox\cv@tmpbox=\hbox to0cm{{\iclrtenhv\hfil\fillzeros[#4]\iclrrulercount}}%
221
- %% \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
222
- %% \advance\cv@refno1\global\advance\iclrrulercount#3\relax
223
- %% \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}%
224
- %% \makeatother
225
- %% % ----- end of vruler
226
-
227
- %% % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
228
- %% \def\iclrruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\iclrrulerbox}}
229
- %% \AddToShipoutPicture{%
230
- %% \ificlrfinal\else
231
- %% \iclrruleroffset=\textheight
232
- %% \advance\iclrruleroffset by -3.7pt
233
- %% \color[rgb]{.7,.7,.7}
234
- %% \AtTextUpperLeft{%
235
- %% \put(\LenToUnit{-35pt},\LenToUnit{-\iclrruleroffset}){%left ruler
236
- %% \iclrruler{\iclrrulercount}}
237
- %% }
238
- %% \fi
239
- %% }
240
- %%% To add a vertical bar on the side
241
- %\AddToShipoutPicture{
242
- %\AtTextLowerLeft{
243
- %\hspace*{-1.8cm}
244
- %\colorbox[rgb]{0.7,0.7,0.7}{\small \parbox[b][\textheight]{0.1cm}{}}}
245
- %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/introduction.tex DELETED
@@ -1,10 +0,0 @@
1
- \section{introduction}
2
- Reinforcement Learning (RL) has emerged as a significant research area in the field of artificial intelligence, with a wide range of applications in robotics, finance, healthcare, and gaming \cite{2108.11510}. The primary goal of RL is to develop algorithms that allow agents to learn optimal policies through interaction with their environment, maximizing the cumulative reward over time \cite{1708.05866}. Despite the considerable progress made in recent years, RL still faces several challenges, such as the trade-off between exploration and exploitation, the curse of dimensionality, and the need for efficient algorithms that can handle large-scale and complex problems \cite{1906.10025}.
3
-
4
- One of the major breakthroughs in RL has been the development of Q-learning algorithms, which have been proven to converge to the optimal solution \cite{2303.08631}. However, Q-learning is known to suffer from overestimation bias, leading to suboptimal performance and slow convergence in some cases \cite{2106.14642}. To address this issue, researchers have proposed various modifications and extensions to Q-learning, such as Double Q-learning \cite{1511.02377} and Self-correcting Q-learning \cite{2012.01100}, which aim to mitigate the overestimation bias while maintaining convergence guarantees.
5
-
6
- Another essential aspect of RL research is the incorporation of deep learning techniques, giving rise to the field of Deep Reinforcement Learning (DRL) \cite{1709.05067}. DRL has demonstrated remarkable success in various domains, such as playing video games directly from pixels and learning control policies for robots \cite{1708.05866}. However, DRL algorithms often require a large amount of data and computational resources, which limits their applicability in real-world scenarios \cite{1906.10025}. To overcome these limitations, researchers have proposed various approaches, including distributed DRL \cite{2212.00253} and expert-guided DRL \cite{2106.14642}, which aim to improve the sample efficiency and scalability of DRL algorithms.
7
-
8
- Related work in the field of RL has also focused on the development of policy gradient methods, which optimize the policy directly by following the gradient of the expected return \cite{1811.09013}. These methods have been particularly successful in continuous action settings and have led to the development of algorithms such as Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. However, policy gradient methods often require on-policy data, which can be inefficient in terms of sample complexity \cite{1911.04817}.
9
-
10
- In summary, this survey aims to provide a comprehensive overview of the current state of Reinforcement Learning, focusing on the challenges and recent advances in Q-learning, Deep Reinforcement Learning, and policy gradient methods. By examining the key algorithms, techniques, and applications in these areas, we hope to shed light on the current limitations and future research directions in the field of RL.
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/main.aux DELETED
@@ -1,92 +0,0 @@
1
- \relax
2
- \providecommand\hyper@newdestlabel[2]{}
3
- \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
4
- \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
5
- \global\let\oldcontentsline\contentsline
6
- \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
7
- \global\let\oldnewlabel\newlabel
8
- \gdef\newlabel#1#2{\newlabelxx{#1}#2}
9
- \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
10
- \AtEndDocument{\ifx\hyper@anchor\@undefined
11
- \let\contentsline\oldcontentsline
12
- \let\newlabel\oldnewlabel
13
- \fi}
14
- \fi}
15
- \global\let\hyper@last\relax
16
- \gdef\HyperFirstAtBeginDocument#1{#1}
17
- \providecommand\HyField@AuxAddToFields[1]{}
18
- \providecommand\HyField@AuxAddToCoFields[2]{}
19
- \citation{2108.11510}
20
- \citation{1708.05866}
21
- \citation{1906.10025}
22
- \citation{2303.08631}
23
- \citation{2106.14642}
24
- \citation{1511.02377}
25
- \citation{2012.01100}
26
- \citation{1709.05067}
27
- \citation{1708.05866}
28
- \citation{1906.10025}
29
- \citation{2212.00253}
30
- \citation{2106.14642}
31
- \citation{1811.09013}
32
- \citation{2209.01820}
33
- \citation{1911.04817}
34
- \citation{1512.07669}
35
- \citation{1511.02377}
36
- \citation{1512.09075}
37
- \citation{2008.10426}
38
- \citation{0711.2185}
39
- \@writefile{toc}{\contentsline {section}{\numberline {1}introduction}{1}{section.1}\protected@file@percent }
40
- \@writefile{toc}{\contentsline {section}{\numberline {2}related works}{1}{section.2}\protected@file@percent }
41
- \@writefile{toc}{\contentsline {paragraph}{Markov Decision Processes:}{1}{section*.1}\protected@file@percent }
42
- \citation{2303.08631}
43
- \citation{2303.08631}
44
- \citation{2012.01100}
45
- \citation{2106.14642}
46
- \citation{2209.01820}
47
- \citation{1811.09013}
48
- \citation{2108.11510}
49
- \citation{1708.05866}
50
- \citation{1906.10025}
51
- \citation{2111.01334}
52
- \citation{1512.09075}
53
- \citation{1511.02377}
54
- \citation{1512.07669}
55
- \@writefile{toc}{\contentsline {paragraph}{Q-Learning and Variants:}{2}{section*.2}\protected@file@percent }
56
- \@writefile{toc}{\contentsline {paragraph}{Expert Q-Learning:}{2}{section*.3}\protected@file@percent }
57
- \@writefile{toc}{\contentsline {paragraph}{Policy Gradient Methods:}{2}{section*.4}\protected@file@percent }
58
- \@writefile{toc}{\contentsline {paragraph}{Deep Reinforcement Learning:}{2}{section*.5}\protected@file@percent }
59
- \@writefile{toc}{\contentsline {paragraph}{Temporal Networks:}{2}{section*.6}\protected@file@percent }
60
- \@writefile{toc}{\contentsline {section}{\numberline {3}backgrounds}{2}{section.3}\protected@file@percent }
61
- \@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Problem Statement and Foundational Concepts}{2}{subsection.3.1}\protected@file@percent }
62
- \citation{2303.08631}
63
- \citation{2303.08631}
64
- \citation{2106.14642}
65
- \citation{2303.08631}
66
- \citation{2106.14642}
67
- \citation{1703.02102}
68
- \citation{1811.09013}
69
- \citation{2209.01820}
70
- \bibdata{ref}
71
- \bibcite{0711.2185}{{1}{2007}{{Arie~Leizarowitz}}{{}}}
72
- \bibcite{2303.08631}{{2}{2023}{{Barber}}{{}}}
73
- \bibcite{1811.09013}{{3}{2018}{{Ehsan~Imani}}{{}}}
74
- \bibcite{1511.02377}{{4}{2015}{{Ehud~Lehrer}}{{}}}
75
- \bibcite{1708.05866}{{5}{2017}{{Kai~Arulkumaran}}{{}}}
76
- \bibcite{1512.07669}{{6}{2015}{{Krishnamurthy}}{{}}}
77
- \@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Q-Learning and Related Algorithms}{3}{subsection.3.2}\protected@file@percent }
78
- \@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Policy Gradient Methods}{3}{subsection.3.3}\protected@file@percent }
79
- \@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Methodology and Evaluation Metrics}{3}{subsection.3.4}\protected@file@percent }
80
- \bibcite{1911.04817}{{7}{2019}{{Kämmerer}}{{}}}
81
- \bibcite{2106.14642}{{8}{2021}{{Li~Meng}}{{}}}
82
- \bibcite{1709.05067}{{9}{2017}{{Mahipal~Jadeja}}{{}}}
83
- \bibcite{2008.10426}{{10}{2020}{{Nathalie~Bertrand}}{{}}}
84
- \bibcite{2108.11510}{{11}{2021}{{Ngan~Le}}{{}}}
85
- \bibcite{1512.09075}{{12}{2015}{{Philip S.~Thomas}}{{}}}
86
- \bibcite{2212.00253}{{13}{2022}{{Qiyue~Yin}}{{}}}
87
- \bibcite{2012.01100}{{14}{2020}{{Rong~Zhu}}{{}}}
88
- \bibcite{1906.10025}{{15}{2019}{{Sergey~Ivanov}}{{}}}
89
- \bibcite{2209.01820}{{16}{2022}{{van Heeswijk}}{{}}}
90
- \bibcite{2111.01334}{{17}{2021}{{Xiu-Xiu~Zhan}}{{}}}
91
- \bibcite{1703.02102}{{18}{2017}{{Yemi~Okesanjo}}{{}}}
92
- \bibstyle{iclr2022_conference}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/main.bbl DELETED
@@ -1,122 +0,0 @@
1
- \begin{thebibliography}{18}
2
- \providecommand{\natexlab}[1]{#1}
3
- \providecommand{\url}[1]{\texttt{#1}}
4
- \expandafter\ifx\csname urlstyle\endcsname\relax
5
- \providecommand{\doi}[1]{doi: #1}\else
6
- \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi
7
-
8
- \bibitem[Arie~Leizarowitz(2007)]{0711.2185}
9
- Adam~Shwartz Arie~Leizarowitz.
10
- \newblock Exact finite approximations of average-cost countable markov decision
11
- processes.
12
- \newblock \emph{arXiv preprint arXiv:0711.2185}, 2007.
13
- \newblock URL \url{http://arxiv.org/abs/0711.2185v1}.
14
-
15
- \bibitem[Barber(2023)]{2303.08631}
16
- David Barber.
17
- \newblock Smoothed q-learning.
18
- \newblock \emph{arXiv preprint arXiv:2303.08631}, 2023.
19
- \newblock URL \url{http://arxiv.org/abs/2303.08631v1}.
20
-
21
- \bibitem[Ehsan~Imani(2018)]{1811.09013}
22
- Martha~White Ehsan~Imani, Eric~Graves.
23
- \newblock An off-policy policy gradient theorem using emphatic weightings.
24
- \newblock \emph{arXiv preprint arXiv:1811.09013}, 2018.
25
- \newblock URL \url{http://arxiv.org/abs/1811.09013v2}.
26
-
27
- \bibitem[Ehud~Lehrer(2015)]{1511.02377}
28
- Omri N.~Solan Ehud~Lehrer, Eilon~Solan.
29
- \newblock The value functions of markov decision processes.
30
- \newblock \emph{arXiv preprint arXiv:1511.02377}, 2015.
31
- \newblock URL \url{http://arxiv.org/abs/1511.02377v1}.
32
-
33
- \bibitem[Kai~Arulkumaran(2017)]{1708.05866}
34
- Miles Brundage Anil Anthony~Bharath Kai~Arulkumaran, Marc Peter~Deisenroth.
35
- \newblock A brief survey of deep reinforcement learning.
36
- \newblock \emph{arXiv preprint arXiv:1708.05866}, 2017.
37
- \newblock URL \url{http://arxiv.org/abs/1708.05866v2}.
38
-
39
- \bibitem[Krishnamurthy(2015)]{1512.07669}
40
- Vikram Krishnamurthy.
41
- \newblock Reinforcement learning: Stochastic approximation algorithms for
42
- markov decision processes.
43
- \newblock \emph{arXiv preprint arXiv:1512.07669}, 2015.
44
- \newblock URL \url{http://arxiv.org/abs/1512.07669v1}.
45
-
46
- \bibitem[Kämmerer(2019)]{1911.04817}
47
- Mattis~Manfred Kämmerer.
48
- \newblock On policy gradients.
49
- \newblock \emph{arXiv preprint arXiv:1911.04817}, 2019.
50
- \newblock URL \url{http://arxiv.org/abs/1911.04817v1}.
51
-
52
- \bibitem[Li~Meng(2021)]{2106.14642}
53
- Morten Goodwin Paal~Engelstad Li~Meng, Anis~Yazidi.
54
- \newblock Expert q-learning: Deep reinforcement learning with coarse state
55
- values from offline expert examples.
56
- \newblock \emph{arXiv preprint arXiv:2106.14642}, 2021.
57
- \newblock URL \url{http://arxiv.org/abs/2106.14642v3}.
58
-
59
- \bibitem[Mahipal~Jadeja(2017)]{1709.05067}
60
- Agam~Shah Mahipal~Jadeja, Neelanshi~Varia.
61
- \newblock Deep reinforcement learning for conversational ai.
62
- \newblock \emph{arXiv preprint arXiv:1709.05067}, 2017.
63
- \newblock URL \url{http://arxiv.org/abs/1709.05067v1}.
64
-
65
- \bibitem[Nathalie~Bertrand(2020)]{2008.10426}
66
- Thomas Brihaye Paulin~Fournier Nathalie~Bertrand, Patricia~Bouyer.
67
- \newblock Taming denumerable markov decision processes with decisiveness.
68
- \newblock \emph{arXiv preprint arXiv:2008.10426}, 2020.
69
- \newblock URL \url{http://arxiv.org/abs/2008.10426v1}.
70
-
71
- \bibitem[Ngan~Le(2021)]{2108.11510}
72
- Kashu Yamazaki Khoa Luu Marios~Savvides Ngan~Le, Vidhiwar Singh~Rathour.
73
- \newblock Deep reinforcement learning in computer vision: A comprehensive
74
- survey.
75
- \newblock \emph{arXiv preprint arXiv:2108.11510}, 2021.
76
- \newblock URL \url{http://arxiv.org/abs/2108.11510v1}.
77
-
78
- \bibitem[Philip S.~Thomas(2015)]{1512.09075}
79
- Billy~Okal Philip S.~Thomas.
80
- \newblock A notation for markov decision processes.
81
- \newblock \emph{arXiv preprint arXiv:1512.09075}, 2015.
82
- \newblock URL \url{http://arxiv.org/abs/1512.09075v2}.
83
-
84
- \bibitem[Qiyue~Yin(2022)]{2212.00253}
85
- Shengqi Shen Jun Yang Meijing Zhao Kaiqi Huang Bin Liang Liang~Wang Qiyue~Yin,
86
- Tongtong~Yu.
87
- \newblock Distributed deep reinforcement learning: A survey and a multi-player
88
- multi-agent learning toolbox.
89
- \newblock \emph{arXiv preprint arXiv:2212.00253}, 2022.
90
- \newblock URL \url{http://arxiv.org/abs/2212.00253v1}.
91
-
92
- \bibitem[Rong~Zhu(2020)]{2012.01100}
93
- Mattia~Rigotti Rong~Zhu.
94
- \newblock Self-correcting q-learning.
95
- \newblock \emph{arXiv preprint arXiv:2012.01100}, 2020.
96
- \newblock URL \url{http://arxiv.org/abs/2012.01100v2}.
97
-
98
- \bibitem[Sergey~Ivanov(2019)]{1906.10025}
99
- Alexander~D'yakonov Sergey~Ivanov.
100
- \newblock Modern deep reinforcement learning algorithms.
101
- \newblock \emph{arXiv preprint arXiv:1906.10025}, 2019.
102
- \newblock URL \url{http://arxiv.org/abs/1906.10025v2}.
103
-
104
- \bibitem[van Heeswijk(2022)]{2209.01820}
105
- W.~J.~A. van Heeswijk.
106
- \newblock Natural policy gradients in reinforcement learning explained.
107
- \newblock \emph{arXiv preprint arXiv:2209.01820}, 2022.
108
- \newblock URL \url{http://arxiv.org/abs/2209.01820v1}.
109
-
110
- \bibitem[Xiu-Xiu~Zhan(2021)]{2111.01334}
111
- Zhipeng Wang Huijuang Wang Petter Holme Zi-Ke~Zhang Xiu-Xiu~Zhan, Chuang~Liu.
112
- \newblock Measuring and utilizing temporal network dissimilarity.
113
- \newblock \emph{arXiv preprint arXiv:2111.01334}, 2021.
114
- \newblock URL \url{http://arxiv.org/abs/2111.01334v1}.
115
-
116
- \bibitem[Yemi~Okesanjo(2017)]{1703.02102}
117
- Victor~Kofia Yemi~Okesanjo.
118
- \newblock Revisiting stochastic off-policy action-value gradients.
119
- \newblock \emph{arXiv preprint arXiv:1703.02102}, 2017.
120
- \newblock URL \url{http://arxiv.org/abs/1703.02102v2}.
121
-
122
- \end{thebibliography}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/main.blg DELETED
@@ -1,935 +0,0 @@
1
- This is BibTeX, Version 0.99d (TeX Live 2019/W32TeX)
2
- Capacity: max_strings=200000, hash_size=200000, hash_prime=170003
3
- The top-level auxiliary file: main.aux
4
- The style file: iclr2022_conference.bst
5
- Database file #1: ref.bib
6
- Repeated entry---line 19 of file ref.bib
7
- : @article{1512.07669
8
- : ,
9
- I'm skipping whatever remains of this entry
10
- Repeated entry---line 53 of file ref.bib
11
- : @article{1512.07669
12
- : ,
13
- I'm skipping whatever remains of this entry
14
- Repeated entry---line 71 of file ref.bib
15
- : @article{1511.02377
16
- : ,
17
- I'm skipping whatever remains of this entry
18
- Repeated entry---line 103 of file ref.bib
19
- : @article{1512.07669
20
- : ,
21
- I'm skipping whatever remains of this entry
22
- Repeated entry---line 121 of file ref.bib
23
- : @article{1511.02377
24
- : ,
25
- I'm skipping whatever remains of this entry
26
- Repeated entry---line 137 of file ref.bib
27
- : @article{1512.09075
28
- : ,
29
- I'm skipping whatever remains of this entry
30
- Repeated entry---line 169 of file ref.bib
31
- : @article{1512.07669
32
- : ,
33
- I'm skipping whatever remains of this entry
34
- Repeated entry---line 187 of file ref.bib
35
- : @article{1511.02377
36
- : ,
37
- I'm skipping whatever remains of this entry
38
- Repeated entry---line 203 of file ref.bib
39
- : @article{1512.09075
40
- : ,
41
- I'm skipping whatever remains of this entry
42
- Repeated entry---line 219 of file ref.bib
43
- : @article{2008.10426
44
- : ,
45
- I'm skipping whatever remains of this entry
46
- Repeated entry---line 253 of file ref.bib
47
- : @article{1512.07669
48
- : ,
49
- I'm skipping whatever remains of this entry
50
- Repeated entry---line 271 of file ref.bib
51
- : @article{1511.02377
52
- : ,
53
- I'm skipping whatever remains of this entry
54
- Repeated entry---line 287 of file ref.bib
55
- : @article{1512.09075
56
- : ,
57
- I'm skipping whatever remains of this entry
58
- Repeated entry---line 303 of file ref.bib
59
- : @article{2008.10426
60
- : ,
61
- I'm skipping whatever remains of this entry
62
- Repeated entry---line 319 of file ref.bib
63
- : @article{0711.2185
64
- : ,
65
- I'm skipping whatever remains of this entry
66
- Repeated entry---line 353 of file ref.bib
67
- : @article{1512.07669
68
- : ,
69
- I'm skipping whatever remains of this entry
70
- Repeated entry---line 371 of file ref.bib
71
- : @article{1511.02377
72
- : ,
73
- I'm skipping whatever remains of this entry
74
- Repeated entry---line 387 of file ref.bib
75
- : @article{1512.09075
76
- : ,
77
- I'm skipping whatever remains of this entry
78
- Repeated entry---line 403 of file ref.bib
79
- : @article{2008.10426
80
- : ,
81
- I'm skipping whatever remains of this entry
82
- Repeated entry---line 419 of file ref.bib
83
- : @article{0711.2185
84
- : ,
85
- I'm skipping whatever remains of this entry
86
- Repeated entry---line 437 of file ref.bib
87
- : @article{2303.08631
88
- : ,
89
- I'm skipping whatever remains of this entry
90
- Repeated entry---line 471 of file ref.bib
91
- : @article{1512.07669
92
- : ,
93
- I'm skipping whatever remains of this entry
94
- Repeated entry---line 489 of file ref.bib
95
- : @article{1511.02377
96
- : ,
97
- I'm skipping whatever remains of this entry
98
- Repeated entry---line 505 of file ref.bib
99
- : @article{1512.09075
100
- : ,
101
- I'm skipping whatever remains of this entry
102
- Repeated entry---line 521 of file ref.bib
103
- : @article{2008.10426
104
- : ,
105
- I'm skipping whatever remains of this entry
106
- Repeated entry---line 537 of file ref.bib
107
- : @article{0711.2185
108
- : ,
109
- I'm skipping whatever remains of this entry
110
- Repeated entry---line 555 of file ref.bib
111
- : @article{2303.08631
112
- : ,
113
- I'm skipping whatever remains of this entry
114
- Repeated entry---line 571 of file ref.bib
115
- : @article{2106.14642
116
- : ,
117
- I'm skipping whatever remains of this entry
118
- Repeated entry---line 605 of file ref.bib
119
- : @article{1512.07669
120
- : ,
121
- I'm skipping whatever remains of this entry
122
- Repeated entry---line 623 of file ref.bib
123
- : @article{1511.02377
124
- : ,
125
- I'm skipping whatever remains of this entry
126
- Repeated entry---line 639 of file ref.bib
127
- : @article{1512.09075
128
- : ,
129
- I'm skipping whatever remains of this entry
130
- Repeated entry---line 655 of file ref.bib
131
- : @article{2008.10426
132
- : ,
133
- I'm skipping whatever remains of this entry
134
- Repeated entry---line 671 of file ref.bib
135
- : @article{0711.2185
136
- : ,
137
- I'm skipping whatever remains of this entry
138
- Repeated entry---line 689 of file ref.bib
139
- : @article{2303.08631
140
- : ,
141
- I'm skipping whatever remains of this entry
142
- Repeated entry---line 705 of file ref.bib
143
- : @article{2106.14642
144
- : ,
145
- I'm skipping whatever remains of this entry
146
- Repeated entry---line 755 of file ref.bib
147
- : @article{1512.07669
148
- : ,
149
- I'm skipping whatever remains of this entry
150
- Repeated entry---line 773 of file ref.bib
151
- : @article{1511.02377
152
- : ,
153
- I'm skipping whatever remains of this entry
154
- Repeated entry---line 789 of file ref.bib
155
- : @article{1512.09075
156
- : ,
157
- I'm skipping whatever remains of this entry
158
- Repeated entry---line 805 of file ref.bib
159
- : @article{2008.10426
160
- : ,
161
- I'm skipping whatever remains of this entry
162
- Repeated entry---line 821 of file ref.bib
163
- : @article{0711.2185
164
- : ,
165
- I'm skipping whatever remains of this entry
166
- Repeated entry---line 839 of file ref.bib
167
- : @article{2303.08631
168
- : ,
169
- I'm skipping whatever remains of this entry
170
- Repeated entry---line 855 of file ref.bib
171
- : @article{2106.14642
172
- : ,
173
- I'm skipping whatever remains of this entry
174
- Repeated entry---line 889 of file ref.bib
175
- : @article{2012.01100
176
- : ,
177
- I'm skipping whatever remains of this entry
178
- Repeated entry---line 921 of file ref.bib
179
- : @article{1512.07669
180
- : ,
181
- I'm skipping whatever remains of this entry
182
- Repeated entry---line 939 of file ref.bib
183
- : @article{1511.02377
184
- : ,
185
- I'm skipping whatever remains of this entry
186
- Repeated entry---line 955 of file ref.bib
187
- : @article{1512.09075
188
- : ,
189
- I'm skipping whatever remains of this entry
190
- Repeated entry---line 971 of file ref.bib
191
- : @article{2008.10426
192
- : ,
193
- I'm skipping whatever remains of this entry
194
- Repeated entry---line 987 of file ref.bib
195
- : @article{0711.2185
196
- : ,
197
- I'm skipping whatever remains of this entry
198
- Repeated entry---line 1005 of file ref.bib
199
- : @article{2303.08631
200
- : ,
201
- I'm skipping whatever remains of this entry
202
- Repeated entry---line 1021 of file ref.bib
203
- : @article{2106.14642
204
- : ,
205
- I'm skipping whatever remains of this entry
206
- Repeated entry---line 1055 of file ref.bib
207
- : @article{2012.01100
208
- : ,
209
- I'm skipping whatever remains of this entry
210
- Repeated entry---line 1071 of file ref.bib
211
- : @article{1703.02102
212
- : ,
213
- I'm skipping whatever remains of this entry
214
- Repeated entry---line 1103 of file ref.bib
215
- : @article{1512.07669
216
- : ,
217
- I'm skipping whatever remains of this entry
218
- Repeated entry---line 1121 of file ref.bib
219
- : @article{1511.02377
220
- : ,
221
- I'm skipping whatever remains of this entry
222
- Repeated entry---line 1137 of file ref.bib
223
- : @article{1512.09075
224
- : ,
225
- I'm skipping whatever remains of this entry
226
- Repeated entry---line 1153 of file ref.bib
227
- : @article{2008.10426
228
- : ,
229
- I'm skipping whatever remains of this entry
230
- Repeated entry---line 1169 of file ref.bib
231
- : @article{0711.2185
232
- : ,
233
- I'm skipping whatever remains of this entry
234
- Repeated entry---line 1187 of file ref.bib
235
- : @article{2303.08631
236
- : ,
237
- I'm skipping whatever remains of this entry
238
- Repeated entry---line 1203 of file ref.bib
239
- : @article{2106.14642
240
- : ,
241
- I'm skipping whatever remains of this entry
242
- Repeated entry---line 1237 of file ref.bib
243
- : @article{2012.01100
244
- : ,
245
- I'm skipping whatever remains of this entry
246
- Repeated entry---line 1253 of file ref.bib
247
- : @article{1703.02102
248
- : ,
249
- I'm skipping whatever remains of this entry
250
- Repeated entry---line 1269 of file ref.bib
251
- : @article{2209.01820
252
- : ,
253
- I'm skipping whatever remains of this entry
254
- Repeated entry---line 1301 of file ref.bib
255
- : @article{1512.07669
256
- : ,
257
- I'm skipping whatever remains of this entry
258
- Repeated entry---line 1319 of file ref.bib
259
- : @article{1511.02377
260
- : ,
261
- I'm skipping whatever remains of this entry
262
- Repeated entry---line 1335 of file ref.bib
263
- : @article{1512.09075
264
- : ,
265
- I'm skipping whatever remains of this entry
266
- Repeated entry---line 1351 of file ref.bib
267
- : @article{2008.10426
268
- : ,
269
- I'm skipping whatever remains of this entry
270
- Repeated entry---line 1367 of file ref.bib
271
- : @article{0711.2185
272
- : ,
273
- I'm skipping whatever remains of this entry
274
- Repeated entry---line 1385 of file ref.bib
275
- : @article{2303.08631
276
- : ,
277
- I'm skipping whatever remains of this entry
278
- Repeated entry---line 1401 of file ref.bib
279
- : @article{2106.14642
280
- : ,
281
- I'm skipping whatever remains of this entry
282
- Repeated entry---line 1435 of file ref.bib
283
- : @article{2012.01100
284
- : ,
285
- I'm skipping whatever remains of this entry
286
- Repeated entry---line 1451 of file ref.bib
287
- : @article{1703.02102
288
- : ,
289
- I'm skipping whatever remains of this entry
290
- Repeated entry---line 1467 of file ref.bib
291
- : @article{2209.01820
292
- : ,
293
- I'm skipping whatever remains of this entry
294
- Repeated entry---line 1483 of file ref.bib
295
- : @article{1811.09013
296
- : ,
297
- I'm skipping whatever remains of this entry
298
- Repeated entry---line 1515 of file ref.bib
299
- : @article{1512.07669
300
- : ,
301
- I'm skipping whatever remains of this entry
302
- Repeated entry---line 1533 of file ref.bib
303
- : @article{1511.02377
304
- : ,
305
- I'm skipping whatever remains of this entry
306
- Repeated entry---line 1549 of file ref.bib
307
- : @article{1512.09075
308
- : ,
309
- I'm skipping whatever remains of this entry
310
- Repeated entry---line 1565 of file ref.bib
311
- : @article{2008.10426
312
- : ,
313
- I'm skipping whatever remains of this entry
314
- Repeated entry---line 1581 of file ref.bib
315
- : @article{0711.2185
316
- : ,
317
- I'm skipping whatever remains of this entry
318
- Repeated entry---line 1599 of file ref.bib
319
- : @article{2303.08631
320
- : ,
321
- I'm skipping whatever remains of this entry
322
- Repeated entry---line 1615 of file ref.bib
323
- : @article{2106.14642
324
- : ,
325
- I'm skipping whatever remains of this entry
326
- Repeated entry---line 1649 of file ref.bib
327
- : @article{2012.01100
328
- : ,
329
- I'm skipping whatever remains of this entry
330
- Repeated entry---line 1665 of file ref.bib
331
- : @article{1703.02102
332
- : ,
333
- I'm skipping whatever remains of this entry
334
- Repeated entry---line 1681 of file ref.bib
335
- : @article{2209.01820
336
- : ,
337
- I'm skipping whatever remains of this entry
338
- Repeated entry---line 1697 of file ref.bib
339
- : @article{1811.09013
340
- : ,
341
- I'm skipping whatever remains of this entry
342
- Repeated entry---line 1713 of file ref.bib
343
- : @article{1911.04817
344
- : ,
345
- I'm skipping whatever remains of this entry
346
- Repeated entry---line 1745 of file ref.bib
347
- : @article{1512.07669
348
- : ,
349
- I'm skipping whatever remains of this entry
350
- Repeated entry---line 1763 of file ref.bib
351
- : @article{1511.02377
352
- : ,
353
- I'm skipping whatever remains of this entry
354
- Repeated entry---line 1779 of file ref.bib
355
- : @article{1512.09075
356
- : ,
357
- I'm skipping whatever remains of this entry
358
- Repeated entry---line 1795 of file ref.bib
359
- : @article{2008.10426
360
- : ,
361
- I'm skipping whatever remains of this entry
362
- Repeated entry---line 1811 of file ref.bib
363
- : @article{0711.2185
364
- : ,
365
- I'm skipping whatever remains of this entry
366
- Repeated entry---line 1829 of file ref.bib
367
- : @article{2303.08631
368
- : ,
369
- I'm skipping whatever remains of this entry
370
- Repeated entry---line 1845 of file ref.bib
371
- : @article{2106.14642
372
- : ,
373
- I'm skipping whatever remains of this entry
374
- Repeated entry---line 1879 of file ref.bib
375
- : @article{2012.01100
376
- : ,
377
- I'm skipping whatever remains of this entry
378
- Repeated entry---line 1895 of file ref.bib
379
- : @article{1703.02102
380
- : ,
381
- I'm skipping whatever remains of this entry
382
- Repeated entry---line 1911 of file ref.bib
383
- : @article{2209.01820
384
- : ,
385
- I'm skipping whatever remains of this entry
386
- Repeated entry---line 1927 of file ref.bib
387
- : @article{1811.09013
388
- : ,
389
- I'm skipping whatever remains of this entry
390
- Repeated entry---line 1943 of file ref.bib
391
- : @article{1911.04817
392
- : ,
393
- I'm skipping whatever remains of this entry
394
- Repeated entry---line 1959 of file ref.bib
395
- : @article{2108.11510
396
- : ,
397
- I'm skipping whatever remains of this entry
398
- Repeated entry---line 1993 of file ref.bib
399
- : @article{1512.07669
400
- : ,
401
- I'm skipping whatever remains of this entry
402
- Repeated entry---line 2011 of file ref.bib
403
- : @article{1511.02377
404
- : ,
405
- I'm skipping whatever remains of this entry
406
- Repeated entry---line 2027 of file ref.bib
407
- : @article{1512.09075
408
- : ,
409
- I'm skipping whatever remains of this entry
410
- Repeated entry---line 2043 of file ref.bib
411
- : @article{2008.10426
412
- : ,
413
- I'm skipping whatever remains of this entry
414
- Repeated entry---line 2059 of file ref.bib
415
- : @article{0711.2185
416
- : ,
417
- I'm skipping whatever remains of this entry
418
- Repeated entry---line 2077 of file ref.bib
419
- : @article{2303.08631
420
- : ,
421
- I'm skipping whatever remains of this entry
422
- Repeated entry---line 2093 of file ref.bib
423
- : @article{2106.14642
424
- : ,
425
- I'm skipping whatever remains of this entry
426
- Repeated entry---line 2127 of file ref.bib
427
- : @article{2012.01100
428
- : ,
429
- I'm skipping whatever remains of this entry
430
- Repeated entry---line 2143 of file ref.bib
431
- : @article{1703.02102
432
- : ,
433
- I'm skipping whatever remains of this entry
434
- Repeated entry---line 2159 of file ref.bib
435
- : @article{2209.01820
436
- : ,
437
- I'm skipping whatever remains of this entry
438
- Repeated entry---line 2175 of file ref.bib
439
- : @article{1811.09013
440
- : ,
441
- I'm skipping whatever remains of this entry
442
- Repeated entry---line 2191 of file ref.bib
443
- : @article{1911.04817
444
- : ,
445
- I'm skipping whatever remains of this entry
446
- Repeated entry---line 2207 of file ref.bib
447
- : @article{2108.11510
448
- : ,
449
- I'm skipping whatever remains of this entry
450
- Repeated entry---line 2223 of file ref.bib
451
- : @article{2212.00253
452
- : ,
453
- I'm skipping whatever remains of this entry
454
- Repeated entry---line 2257 of file ref.bib
455
- : @article{1512.07669
456
- : ,
457
- I'm skipping whatever remains of this entry
458
- Repeated entry---line 2275 of file ref.bib
459
- : @article{1511.02377
460
- : ,
461
- I'm skipping whatever remains of this entry
462
- Repeated entry---line 2291 of file ref.bib
463
- : @article{1512.09075
464
- : ,
465
- I'm skipping whatever remains of this entry
466
- Repeated entry---line 2307 of file ref.bib
467
- : @article{2008.10426
468
- : ,
469
- I'm skipping whatever remains of this entry
470
- Repeated entry---line 2323 of file ref.bib
471
- : @article{0711.2185
472
- : ,
473
- I'm skipping whatever remains of this entry
474
- Repeated entry---line 2341 of file ref.bib
475
- : @article{2303.08631
476
- : ,
477
- I'm skipping whatever remains of this entry
478
- Repeated entry---line 2357 of file ref.bib
479
- : @article{2106.14642
480
- : ,
481
- I'm skipping whatever remains of this entry
482
- Repeated entry---line 2391 of file ref.bib
483
- : @article{2012.01100
484
- : ,
485
- I'm skipping whatever remains of this entry
486
- Repeated entry---line 2407 of file ref.bib
487
- : @article{1703.02102
488
- : ,
489
- I'm skipping whatever remains of this entry
490
- Repeated entry---line 2423 of file ref.bib
491
- : @article{2209.01820
492
- : ,
493
- I'm skipping whatever remains of this entry
494
- Repeated entry---line 2439 of file ref.bib
495
- : @article{1811.09013
496
- : ,
497
- I'm skipping whatever remains of this entry
498
- Repeated entry---line 2455 of file ref.bib
499
- : @article{1911.04817
500
- : ,
501
- I'm skipping whatever remains of this entry
502
- Repeated entry---line 2471 of file ref.bib
503
- : @article{2108.11510
504
- : ,
505
- I'm skipping whatever remains of this entry
506
- Repeated entry---line 2487 of file ref.bib
507
- : @article{2212.00253
508
- : ,
509
- I'm skipping whatever remains of this entry
510
- Repeated entry---line 2505 of file ref.bib
511
- : @article{1709.05067
512
- : ,
513
- I'm skipping whatever remains of this entry
514
- Repeated entry---line 2537 of file ref.bib
515
- : @article{1512.07669
516
- : ,
517
- I'm skipping whatever remains of this entry
518
- Repeated entry---line 2555 of file ref.bib
519
- : @article{1511.02377
520
- : ,
521
- I'm skipping whatever remains of this entry
522
- Repeated entry---line 2571 of file ref.bib
523
- : @article{1512.09075
524
- : ,
525
- I'm skipping whatever remains of this entry
526
- Repeated entry---line 2587 of file ref.bib
527
- : @article{2008.10426
528
- : ,
529
- I'm skipping whatever remains of this entry
530
- Repeated entry---line 2603 of file ref.bib
531
- : @article{0711.2185
532
- : ,
533
- I'm skipping whatever remains of this entry
534
- Repeated entry---line 2621 of file ref.bib
535
- : @article{2303.08631
536
- : ,
537
- I'm skipping whatever remains of this entry
538
- Repeated entry---line 2637 of file ref.bib
539
- : @article{2106.14642
540
- : ,
541
- I'm skipping whatever remains of this entry
542
- Repeated entry---line 2671 of file ref.bib
543
- : @article{2012.01100
544
- : ,
545
- I'm skipping whatever remains of this entry
546
- Repeated entry---line 2687 of file ref.bib
547
- : @article{1703.02102
548
- : ,
549
- I'm skipping whatever remains of this entry
550
- Repeated entry---line 2703 of file ref.bib
551
- : @article{2209.01820
552
- : ,
553
- I'm skipping whatever remains of this entry
554
- Repeated entry---line 2719 of file ref.bib
555
- : @article{1811.09013
556
- : ,
557
- I'm skipping whatever remains of this entry
558
- Repeated entry---line 2735 of file ref.bib
559
- : @article{1911.04817
560
- : ,
561
- I'm skipping whatever remains of this entry
562
- Repeated entry---line 2751 of file ref.bib
563
- : @article{2108.11510
564
- : ,
565
- I'm skipping whatever remains of this entry
566
- Repeated entry---line 2767 of file ref.bib
567
- : @article{2212.00253
568
- : ,
569
- I'm skipping whatever remains of this entry
570
- Repeated entry---line 2785 of file ref.bib
571
- : @article{1709.05067
572
- : ,
573
- I'm skipping whatever remains of this entry
574
- Repeated entry---line 2801 of file ref.bib
575
- : @article{1708.05866
576
- : ,
577
- I'm skipping whatever remains of this entry
578
- Repeated entry---line 2833 of file ref.bib
579
- : @article{1512.07669
580
- : ,
581
- I'm skipping whatever remains of this entry
582
- Repeated entry---line 2851 of file ref.bib
583
- : @article{1511.02377
584
- : ,
585
- I'm skipping whatever remains of this entry
586
- Repeated entry---line 2867 of file ref.bib
587
- : @article{1512.09075
588
- : ,
589
- I'm skipping whatever remains of this entry
590
- Repeated entry---line 2883 of file ref.bib
591
- : @article{2008.10426
592
- : ,
593
- I'm skipping whatever remains of this entry
594
- Repeated entry---line 2899 of file ref.bib
595
- : @article{0711.2185
596
- : ,
597
- I'm skipping whatever remains of this entry
598
- Repeated entry---line 2917 of file ref.bib
599
- : @article{2303.08631
600
- : ,
601
- I'm skipping whatever remains of this entry
602
- Repeated entry---line 2933 of file ref.bib
603
- : @article{2106.14642
604
- : ,
605
- I'm skipping whatever remains of this entry
606
- Repeated entry---line 2967 of file ref.bib
607
- : @article{2012.01100
608
- : ,
609
- I'm skipping whatever remains of this entry
610
- Repeated entry---line 2983 of file ref.bib
611
- : @article{1703.02102
612
- : ,
613
- I'm skipping whatever remains of this entry
614
- Repeated entry---line 2999 of file ref.bib
615
- : @article{2209.01820
616
- : ,
617
- I'm skipping whatever remains of this entry
618
- Repeated entry---line 3015 of file ref.bib
619
- : @article{1811.09013
620
- : ,
621
- I'm skipping whatever remains of this entry
622
- Repeated entry---line 3031 of file ref.bib
623
- : @article{1911.04817
624
- : ,
625
- I'm skipping whatever remains of this entry
626
- Repeated entry---line 3047 of file ref.bib
627
- : @article{2108.11510
628
- : ,
629
- I'm skipping whatever remains of this entry
630
- Repeated entry---line 3063 of file ref.bib
631
- : @article{2212.00253
632
- : ,
633
- I'm skipping whatever remains of this entry
634
- Repeated entry---line 3081 of file ref.bib
635
- : @article{1709.05067
636
- : ,
637
- I'm skipping whatever remains of this entry
638
- Repeated entry---line 3097 of file ref.bib
639
- : @article{1708.05866
640
- : ,
641
- I'm skipping whatever remains of this entry
642
- Repeated entry---line 3113 of file ref.bib
643
- : @article{1906.10025
644
- : ,
645
- I'm skipping whatever remains of this entry
646
- Repeated entry---line 3145 of file ref.bib
647
- : @article{1512.07669
648
- : ,
649
- I'm skipping whatever remains of this entry
650
- Repeated entry---line 3163 of file ref.bib
651
- : @article{1511.02377
652
- : ,
653
- I'm skipping whatever remains of this entry
654
- Repeated entry---line 3179 of file ref.bib
655
- : @article{1512.09075
656
- : ,
657
- I'm skipping whatever remains of this entry
658
- Repeated entry---line 3195 of file ref.bib
659
- : @article{2008.10426
660
- : ,
661
- I'm skipping whatever remains of this entry
662
- Repeated entry---line 3211 of file ref.bib
663
- : @article{0711.2185
664
- : ,
665
- I'm skipping whatever remains of this entry
666
- Repeated entry---line 3229 of file ref.bib
667
- : @article{2303.08631
668
- : ,
669
- I'm skipping whatever remains of this entry
670
- Repeated entry---line 3245 of file ref.bib
671
- : @article{2106.14642
672
- : ,
673
- I'm skipping whatever remains of this entry
674
- Repeated entry---line 3279 of file ref.bib
675
- : @article{2012.01100
676
- : ,
677
- I'm skipping whatever remains of this entry
678
- Repeated entry---line 3295 of file ref.bib
679
- : @article{1703.02102
680
- : ,
681
- I'm skipping whatever remains of this entry
682
- Repeated entry---line 3311 of file ref.bib
683
- : @article{2209.01820
684
- : ,
685
- I'm skipping whatever remains of this entry
686
- Repeated entry---line 3327 of file ref.bib
687
- : @article{1811.09013
688
- : ,
689
- I'm skipping whatever remains of this entry
690
- Repeated entry---line 3343 of file ref.bib
691
- : @article{1911.04817
692
- : ,
693
- I'm skipping whatever remains of this entry
694
- Repeated entry---line 3359 of file ref.bib
695
- : @article{2108.11510
696
- : ,
697
- I'm skipping whatever remains of this entry
698
- Repeated entry---line 3375 of file ref.bib
699
- : @article{2212.00253
700
- : ,
701
- I'm skipping whatever remains of this entry
702
- Repeated entry---line 3393 of file ref.bib
703
- : @article{1709.05067
704
- : ,
705
- I'm skipping whatever remains of this entry
706
- Repeated entry---line 3409 of file ref.bib
707
- : @article{1708.05866
708
- : ,
709
- I'm skipping whatever remains of this entry
710
- Repeated entry---line 3425 of file ref.bib
711
- : @article{1906.10025
712
- : ,
713
- I'm skipping whatever remains of this entry
714
- Repeated entry---line 3441 of file ref.bib
715
- : @article{2111.01334
716
- : ,
717
- I'm skipping whatever remains of this entry
718
- Repeated entry---line 3473 of file ref.bib
719
- : @article{1512.07669
720
- : ,
721
- I'm skipping whatever remains of this entry
722
- Repeated entry---line 3491 of file ref.bib
723
- : @article{1511.02377
724
- : ,
725
- I'm skipping whatever remains of this entry
726
- Repeated entry---line 3507 of file ref.bib
727
- : @article{1512.09075
728
- : ,
729
- I'm skipping whatever remains of this entry
730
- Repeated entry---line 3523 of file ref.bib
731
- : @article{2008.10426
732
- : ,
733
- I'm skipping whatever remains of this entry
734
- Repeated entry---line 3539 of file ref.bib
735
- : @article{0711.2185
736
- : ,
737
- I'm skipping whatever remains of this entry
738
- Repeated entry---line 3557 of file ref.bib
739
- : @article{2303.08631
740
- : ,
741
- I'm skipping whatever remains of this entry
742
- Repeated entry---line 3573 of file ref.bib
743
- : @article{2106.14642
744
- : ,
745
- I'm skipping whatever remains of this entry
746
- Repeated entry---line 3607 of file ref.bib
747
- : @article{2012.01100
748
- : ,
749
- I'm skipping whatever remains of this entry
750
- Repeated entry---line 3623 of file ref.bib
751
- : @article{1703.02102
752
- : ,
753
- I'm skipping whatever remains of this entry
754
- Repeated entry---line 3639 of file ref.bib
755
- : @article{2209.01820
756
- : ,
757
- I'm skipping whatever remains of this entry
758
- Repeated entry---line 3655 of file ref.bib
759
- : @article{1811.09013
760
- : ,
761
- I'm skipping whatever remains of this entry
762
- Repeated entry---line 3671 of file ref.bib
763
- : @article{1911.04817
764
- : ,
765
- I'm skipping whatever remains of this entry
766
- Repeated entry---line 3687 of file ref.bib
767
- : @article{2108.11510
768
- : ,
769
- I'm skipping whatever remains of this entry
770
- Repeated entry---line 3703 of file ref.bib
771
- : @article{2212.00253
772
- : ,
773
- I'm skipping whatever remains of this entry
774
- Repeated entry---line 3721 of file ref.bib
775
- : @article{1709.05067
776
- : ,
777
- I'm skipping whatever remains of this entry
778
- Repeated entry---line 3737 of file ref.bib
779
- : @article{1708.05866
780
- : ,
781
- I'm skipping whatever remains of this entry
782
- Repeated entry---line 3753 of file ref.bib
783
- : @article{1906.10025
784
- : ,
785
- I'm skipping whatever remains of this entry
786
- Repeated entry---line 3769 of file ref.bib
787
- : @article{2111.01334
788
- : ,
789
- I'm skipping whatever remains of this entry
790
- Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510
791
- while executing---line 2701 of file iclr2022_conference.bst
792
- Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510
793
- while executing---line 2701 of file iclr2022_conference.bst
794
- Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510
795
- while executing---line 2701 of file iclr2022_conference.bst
796
- Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510
797
- while executing---line 2701 of file iclr2022_conference.bst
798
- Too many commas in name 1 of "Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath" for entry 1708.05866
799
- while executing---line 2701 of file iclr2022_conference.bst
800
- Too many commas in name 1 of "Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath" for entry 1708.05866
801
- while executing---line 2701 of file iclr2022_conference.bst
802
- Too many commas in name 1 of "Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad" for entry 2106.14642
803
- while executing---line 2701 of file iclr2022_conference.bst
804
- Too many commas in name 1 of "Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad" for entry 2106.14642
805
- while executing---line 2701 of file iclr2022_conference.bst
806
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
807
- while executing---line 2701 of file iclr2022_conference.bst
808
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
809
- while executing---line 2701 of file iclr2022_conference.bst
810
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
811
- while executing---line 2701 of file iclr2022_conference.bst
812
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
813
- while executing---line 2701 of file iclr2022_conference.bst
814
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
815
- while executing---line 2701 of file iclr2022_conference.bst
816
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
817
- while executing---line 2701 of file iclr2022_conference.bst
818
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
819
- while executing---line 2701 of file iclr2022_conference.bst
820
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
821
- while executing---line 2701 of file iclr2022_conference.bst
822
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
823
- while executing---line 2701 of file iclr2022_conference.bst
824
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
825
- while executing---line 2701 of file iclr2022_conference.bst
826
- Too many commas in name 1 of "Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier" for entry 2008.10426
827
- while executing---line 2701 of file iclr2022_conference.bst
828
- Too many commas in name 1 of "Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier" for entry 2008.10426
829
- while executing---line 2701 of file iclr2022_conference.bst
830
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
831
- while executing---line 2701 of file iclr2022_conference.bst
832
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
833
- while executing---line 2701 of file iclr2022_conference.bst
834
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
835
- while executing---line 2701 of file iclr2022_conference.bst
836
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
837
- while executing---line 2701 of file iclr2022_conference.bst
838
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
839
- while executing---line 2701 of file iclr2022_conference.bst
840
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
841
- while executing---line 2701 of file iclr2022_conference.bst
842
- Too many commas in name 1 of "Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath" for entry 1708.05866
843
- while executing---line 2865 of file iclr2022_conference.bst
844
- Too many commas in name 1 of "Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath" for entry 1708.05866
845
- while executing---line 2865 of file iclr2022_conference.bst
846
- Too many commas in name 1 of "Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad" for entry 2106.14642
847
- while executing---line 2865 of file iclr2022_conference.bst
848
- Too many commas in name 1 of "Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad" for entry 2106.14642
849
- while executing---line 2865 of file iclr2022_conference.bst
850
- Too many commas in name 1 of "Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier" for entry 2008.10426
851
- while executing---line 2865 of file iclr2022_conference.bst
852
- Too many commas in name 1 of "Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier" for entry 2008.10426
853
- while executing---line 2865 of file iclr2022_conference.bst
854
- Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510
855
- while executing---line 2865 of file iclr2022_conference.bst
856
- Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510
857
- while executing---line 2865 of file iclr2022_conference.bst
858
- Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510
859
- while executing---line 2865 of file iclr2022_conference.bst
860
- Too many commas in name 1 of "Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides" for entry 2108.11510
861
- while executing---line 2865 of file iclr2022_conference.bst
862
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
863
- while executing---line 2865 of file iclr2022_conference.bst
864
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
865
- while executing---line 2865 of file iclr2022_conference.bst
866
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
867
- while executing---line 2865 of file iclr2022_conference.bst
868
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
869
- while executing---line 2865 of file iclr2022_conference.bst
870
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
871
- while executing---line 2865 of file iclr2022_conference.bst
872
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
873
- while executing---line 2865 of file iclr2022_conference.bst
874
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
875
- while executing---line 2865 of file iclr2022_conference.bst
876
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
877
- while executing---line 2865 of file iclr2022_conference.bst
878
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
879
- while executing---line 2865 of file iclr2022_conference.bst
880
- Too many commas in name 1 of "Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang" for entry 2212.00253
881
- while executing---line 2865 of file iclr2022_conference.bst
882
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
883
- while executing---line 2865 of file iclr2022_conference.bst
884
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
885
- while executing---line 2865 of file iclr2022_conference.bst
886
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
887
- while executing---line 2865 of file iclr2022_conference.bst
888
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
889
- while executing---line 2865 of file iclr2022_conference.bst
890
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
891
- while executing---line 2865 of file iclr2022_conference.bst
892
- Too many commas in name 1 of "Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang" for entry 2111.01334
893
- while executing---line 2865 of file iclr2022_conference.bst
894
- You've used 18 entries,
895
- 2773 wiz_defined-function locations,
896
- 692 strings with 8040 characters,
897
- and the built_in function-call counts, 5649 in all, are:
898
- = -- 522
899
- > -- 180
900
- < -- 18
901
- + -- 72
902
- - -- 54
903
- * -- 308
904
- := -- 946
905
- add.period$ -- 72
906
- call.type$ -- 18
907
- change.case$ -- 72
908
- chr.to.int$ -- 18
909
- cite$ -- 36
910
- duplicate$ -- 342
911
- empty$ -- 541
912
- format.name$ -- 72
913
- if$ -- 1171
914
- int.to.chr$ -- 1
915
- int.to.str$ -- 1
916
- missing$ -- 18
917
- newline$ -- 116
918
- num.names$ -- 72
919
- pop$ -- 144
920
- preamble$ -- 1
921
- purify$ -- 54
922
- quote$ -- 0
923
- skip$ -- 235
924
- stack$ -- 0
925
- substring$ -- 36
926
- swap$ -- 18
927
- text.length$ -- 0
928
- text.prefix$ -- 0
929
- top$ -- 0
930
- type$ -- 198
931
- warning$ -- 0
932
- while$ -- 54
933
- width$ -- 0
934
- write$ -- 259
935
- (There were 248 error messages)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/main.log DELETED
@@ -1,460 +0,0 @@
1
- This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/W32TeX) (preloaded format=pdflatex 2020.3.10) 21 APR 2023 00:35
2
- entering extended mode
3
- restricted \write18 enabled.
4
- %&-line parsing enabled.
5
- **main.tex
6
- (./main.tex
7
- LaTeX2e <2020-02-02> patch level 5
8
- L3 programming layer <2020-02-25>
9
- (c:/texlive/2019/texmf-dist/tex/latex/base/article.cls
10
- Document Class: article 2019/12/20 v1.4l Standard LaTeX document class
11
- (c:/texlive/2019/texmf-dist/tex/latex/base/size10.clo
12
- File: size10.clo 2019/12/20 v1.4l Standard LaTeX file (size option)
13
- )
14
- \c@part=\count167
15
- \c@section=\count168
16
- \c@subsection=\count169
17
- \c@subsubsection=\count170
18
- \c@paragraph=\count171
19
- \c@subparagraph=\count172
20
- \c@figure=\count173
21
- \c@table=\count174
22
- \abovecaptionskip=\skip47
23
- \belowcaptionskip=\skip48
24
- \bibindent=\dimen134
25
- )
26
- (c:/texlive/2019/texmf-dist/tex/latex/graphics/graphicx.sty
27
- Package: graphicx 2019/11/30 v1.2a Enhanced LaTeX Graphics (DPC,SPQR)
28
-
29
- (c:/texlive/2019/texmf-dist/tex/latex/graphics/keyval.sty
30
- Package: keyval 2014/10/28 v1.15 key=value parser (DPC)
31
- \KV@toks@=\toks15
32
- )
33
- (c:/texlive/2019/texmf-dist/tex/latex/graphics/graphics.sty
34
- Package: graphics 2019/11/30 v1.4a Standard LaTeX Graphics (DPC,SPQR)
35
-
36
- (c:/texlive/2019/texmf-dist/tex/latex/graphics/trig.sty
37
- Package: trig 2016/01/03 v1.10 sin cos tan (DPC)
38
- )
39
- (c:/texlive/2019/texmf-dist/tex/latex/graphics-cfg/graphics.cfg
40
- File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration
41
- )
42
- Package graphics Info: Driver file: pdftex.def on input line 105.
43
-
44
- (c:/texlive/2019/texmf-dist/tex/latex/graphics-def/pdftex.def
45
- File: pdftex.def 2018/01/08 v1.0l Graphics/color driver for pdftex
46
- ))
47
- \Gin@req@height=\dimen135
48
- \Gin@req@width=\dimen136
49
- )
50
- (c:/texlive/2019/texmf-dist/tex/latex/booktabs/booktabs.sty
51
- Package: booktabs 2020/01/12 v1.61803398 Publication quality tables
52
- \heavyrulewidth=\dimen137
53
- \lightrulewidth=\dimen138
54
- \cmidrulewidth=\dimen139
55
- \belowrulesep=\dimen140
56
- \belowbottomsep=\dimen141
57
- \aboverulesep=\dimen142
58
- \abovetopsep=\dimen143
59
- \cmidrulesep=\dimen144
60
- \cmidrulekern=\dimen145
61
- \defaultaddspace=\dimen146
62
- \@cmidla=\count175
63
- \@cmidlb=\count176
64
- \@aboverulesep=\dimen147
65
- \@belowrulesep=\dimen148
66
- \@thisruleclass=\count177
67
- \@lastruleclass=\count178
68
- \@thisrulewidth=\dimen149
69
- )
70
- (./iclr2022_conference.sty
71
- (c:/texlive/2019/texmf-dist/tex/latex/eso-pic/eso-pic.sty
72
- Package: eso-pic 2018/04/12 v2.0h eso-pic (RN)
73
-
74
- (c:/texlive/2019/texmf-dist/tex/generic/atbegshi/atbegshi.sty
75
- Package: atbegshi 2019/12/05 v1.19 At begin shipout hook (HO)
76
-
77
- (c:/texlive/2019/texmf-dist/tex/generic/infwarerr/infwarerr.sty
78
- Package: infwarerr 2019/12/03 v1.5 Providing info/warning/error messages (HO)
79
- )
80
- (c:/texlive/2019/texmf-dist/tex/generic/ltxcmds/ltxcmds.sty
81
- Package: ltxcmds 2019/12/15 v1.24 LaTeX kernel commands for general use (HO)
82
- )
83
- (c:/texlive/2019/texmf-dist/tex/generic/iftex/iftex.sty
84
- Package: iftex 2019/11/07 v1.0c TeX engine tests
85
- ))
86
- (c:/texlive/2019/texmf-dist/tex/latex/xcolor/xcolor.sty
87
- Package: xcolor 2016/05/11 v2.12 LaTeX color extensions (UK)
88
-
89
- (c:/texlive/2019/texmf-dist/tex/latex/graphics-cfg/color.cfg
90
- File: color.cfg 2016/01/02 v1.6 sample color configuration
91
- )
92
- Package xcolor Info: Driver file: pdftex.def on input line 225.
93
- Package xcolor Info: Model `cmy' substituted by `cmy0' on input line 1348.
94
- Package xcolor Info: Model `hsb' substituted by `rgb' on input line 1352.
95
- Package xcolor Info: Model `RGB' extended on input line 1364.
96
- Package xcolor Info: Model `HTML' substituted by `rgb' on input line 1366.
97
- Package xcolor Info: Model `Hsb' substituted by `hsb' on input line 1367.
98
- Package xcolor Info: Model `tHsb' substituted by `hsb' on input line 1368.
99
- Package xcolor Info: Model `HSB' substituted by `hsb' on input line 1369.
100
- Package xcolor Info: Model `Gray' substituted by `gray' on input line 1370.
101
- Package xcolor Info: Model `wave' substituted by `hsb' on input line 1371.
102
- )) (./fancyhdr.sty
103
- \fancy@headwidth=\skip49
104
- \f@ncyO@elh=\skip50
105
- \f@ncyO@erh=\skip51
106
- \f@ncyO@olh=\skip52
107
- \f@ncyO@orh=\skip53
108
- \f@ncyO@elf=\skip54
109
- \f@ncyO@erf=\skip55
110
- \f@ncyO@olf=\skip56
111
- \f@ncyO@orf=\skip57
112
- ) (./natbib.sty
113
- Package: natbib 2009/07/16 8.31 (PWD, AO)
114
- \bibhang=\skip58
115
- \bibsep=\skip59
116
- LaTeX Info: Redefining \cite on input line 694.
117
- \c@NAT@ctr=\count179
118
- )) (c:/texlive/2019/texmf-dist/tex/latex/psnfss/times.sty
119
- Package: times 2005/04/12 PSNFSS-v9.2a (SPQR)
120
- )
121
- (./math_commands.tex (c:/texlive/2019/texmf-dist/tex/latex/amsmath/amsmath.sty
122
- Package: amsmath 2020/01/20 v2.17e AMS math features
123
- \@mathmargin=\skip60
124
-
125
- For additional information on amsmath, use the `?' option.
126
- (c:/texlive/2019/texmf-dist/tex/latex/amsmath/amstext.sty
127
- Package: amstext 2000/06/29 v2.01 AMS text
128
-
129
- (c:/texlive/2019/texmf-dist/tex/latex/amsmath/amsgen.sty
130
- File: amsgen.sty 1999/11/30 v2.0 generic functions
131
- \@emptytoks=\toks16
132
- \ex@=\dimen150
133
- ))
134
- (c:/texlive/2019/texmf-dist/tex/latex/amsmath/amsbsy.sty
135
- Package: amsbsy 1999/11/29 v1.2d Bold Symbols
136
- \pmbraise@=\dimen151
137
- )
138
- (c:/texlive/2019/texmf-dist/tex/latex/amsmath/amsopn.sty
139
- Package: amsopn 2016/03/08 v2.02 operator names
140
- )
141
- \inf@bad=\count180
142
- LaTeX Info: Redefining \frac on input line 227.
143
- \uproot@=\count181
144
- \leftroot@=\count182
145
- LaTeX Info: Redefining \overline on input line 389.
146
- \classnum@=\count183
147
- \DOTSCASE@=\count184
148
- LaTeX Info: Redefining \ldots on input line 486.
149
- LaTeX Info: Redefining \dots on input line 489.
150
- LaTeX Info: Redefining \cdots on input line 610.
151
- \Mathstrutbox@=\box45
152
- \strutbox@=\box46
153
- \big@size=\dimen152
154
- LaTeX Font Info: Redeclaring font encoding OML on input line 733.
155
- LaTeX Font Info: Redeclaring font encoding OMS on input line 734.
156
- \macc@depth=\count185
157
- \c@MaxMatrixCols=\count186
158
- \dotsspace@=\muskip16
159
- \c@parentequation=\count187
160
- \dspbrk@lvl=\count188
161
- \tag@help=\toks17
162
- \row@=\count189
163
- \column@=\count190
164
- \maxfields@=\count191
165
- \andhelp@=\toks18
166
- \eqnshift@=\dimen153
167
- \alignsep@=\dimen154
168
- \tagshift@=\dimen155
169
- \tagwidth@=\dimen156
170
- \totwidth@=\dimen157
171
- \lineht@=\dimen158
172
- \@envbody=\toks19
173
- \multlinegap=\skip61
174
- \multlinetaggap=\skip62
175
- \mathdisplay@stack=\toks20
176
- LaTeX Info: Redefining \[ on input line 2859.
177
- LaTeX Info: Redefining \] on input line 2860.
178
- )
179
- (c:/texlive/2019/texmf-dist/tex/latex/amsfonts/amsfonts.sty
180
- Package: amsfonts 2013/01/14 v3.01 Basic AMSFonts support
181
- \symAMSa=\mathgroup4
182
- \symAMSb=\mathgroup5
183
- LaTeX Font Info: Redeclaring math symbol \hbar on input line 98.
184
- LaTeX Font Info: Overwriting math alphabet `\mathfrak' in version `bold'
185
- (Font) U/euf/m/n --> U/euf/b/n on input line 106.
186
- )
187
- (c:/texlive/2019/texmf-dist/tex/latex/tools/bm.sty
188
- Package: bm 2019/07/24 v1.2d Bold Symbol Support (DPC/FMi)
189
- \symboldoperators=\mathgroup6
190
- \symboldletters=\mathgroup7
191
- \symboldsymbols=\mathgroup8
192
- LaTeX Font Info: Redeclaring math alphabet \mathbf on input line 141.
193
- LaTeX Info: Redefining \bm on input line 209.
194
- )
195
- LaTeX Font Info: Overwriting math alphabet `\mathsfit' in version `bold'
196
- (Font) OT1/phv/m/sl --> OT1/phv/bx/n on input line 314.
197
- )
198
- (c:/texlive/2019/texmf-dist/tex/latex/hyperref/hyperref.sty
199
- Package: hyperref 2020/01/14 v7.00d Hypertext links for LaTeX
200
-
201
- (c:/texlive/2019/texmf-dist/tex/latex/pdftexcmds/pdftexcmds.sty
202
- Package: pdftexcmds 2019/11/24 v0.31 Utility functions of pdfTeX for LuaTeX (HO
203
- )
204
- Package pdftexcmds Info: \pdf@primitive is available.
205
- Package pdftexcmds Info: \pdf@ifprimitive is available.
206
- Package pdftexcmds Info: \pdfdraftmode found.
207
- )
208
- (c:/texlive/2019/texmf-dist/tex/generic/kvsetkeys/kvsetkeys.sty
209
- Package: kvsetkeys 2019/12/15 v1.18 Key value parser (HO)
210
- )
211
- (c:/texlive/2019/texmf-dist/tex/generic/kvdefinekeys/kvdefinekeys.sty
212
- Package: kvdefinekeys 2019-12-19 v1.6 Define keys (HO)
213
- )
214
- (c:/texlive/2019/texmf-dist/tex/generic/pdfescape/pdfescape.sty
215
- Package: pdfescape 2019/12/09 v1.15 Implements pdfTeX's escape features (HO)
216
- )
217
- (c:/texlive/2019/texmf-dist/tex/latex/hycolor/hycolor.sty
218
- Package: hycolor 2020-01-27 v1.10 Color options for hyperref/bookmark (HO)
219
- )
220
- (c:/texlive/2019/texmf-dist/tex/latex/letltxmacro/letltxmacro.sty
221
- Package: letltxmacro 2019/12/03 v1.6 Let assignment for LaTeX macros (HO)
222
- )
223
- (c:/texlive/2019/texmf-dist/tex/latex/auxhook/auxhook.sty
224
- Package: auxhook 2019-12-17 v1.6 Hooks for auxiliary files (HO)
225
- )
226
- (c:/texlive/2019/texmf-dist/tex/latex/kvoptions/kvoptions.sty
227
- Package: kvoptions 2019/11/29 v3.13 Key value format for package options (HO)
228
- )
229
- \@linkdim=\dimen159
230
- \Hy@linkcounter=\count192
231
- \Hy@pagecounter=\count193
232
-
233
- (c:/texlive/2019/texmf-dist/tex/latex/hyperref/pd1enc.def
234
- File: pd1enc.def 2020/01/14 v7.00d Hyperref: PDFDocEncoding definition (HO)
235
- )
236
- (c:/texlive/2019/texmf-dist/tex/generic/intcalc/intcalc.sty
237
- Package: intcalc 2019/12/15 v1.3 Expandable calculations with integers (HO)
238
- )
239
- (c:/texlive/2019/texmf-dist/tex/generic/etexcmds/etexcmds.sty
240
- Package: etexcmds 2019/12/15 v1.7 Avoid name clashes with e-TeX commands (HO)
241
- )
242
- \Hy@SavedSpaceFactor=\count194
243
- \pdfmajorversion=\count195
244
- Package hyperref Info: Hyper figures OFF on input line 4547.
245
- Package hyperref Info: Link nesting OFF on input line 4552.
246
- Package hyperref Info: Hyper index ON on input line 4555.
247
- Package hyperref Info: Plain pages OFF on input line 4562.
248
- Package hyperref Info: Backreferencing OFF on input line 4567.
249
- Package hyperref Info: Implicit mode ON; LaTeX internals redefined.
250
- Package hyperref Info: Bookmarks ON on input line 4800.
251
- \c@Hy@tempcnt=\count196
252
-
253
- (c:/texlive/2019/texmf-dist/tex/latex/url/url.sty
254
- \Urlmuskip=\muskip17
255
- Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc.
256
- )
257
- LaTeX Info: Redefining \url on input line 5159.
258
- \XeTeXLinkMargin=\dimen160
259
-
260
- (c:/texlive/2019/texmf-dist/tex/generic/bitset/bitset.sty
261
- Package: bitset 2019/12/09 v1.3 Handle bit-vector datatype (HO)
262
-
263
- (c:/texlive/2019/texmf-dist/tex/generic/bigintcalc/bigintcalc.sty
264
- Package: bigintcalc 2019/12/15 v1.5 Expandable calculations on big integers (HO
265
- )
266
- ))
267
- \Fld@menulength=\count197
268
- \Field@Width=\dimen161
269
- \Fld@charsize=\dimen162
270
- Package hyperref Info: Hyper figures OFF on input line 6430.
271
- Package hyperref Info: Link nesting OFF on input line 6435.
272
- Package hyperref Info: Hyper index ON on input line 6438.
273
- Package hyperref Info: backreferencing OFF on input line 6445.
274
- Package hyperref Info: Link coloring OFF on input line 6450.
275
- Package hyperref Info: Link coloring with OCG OFF on input line 6455.
276
- Package hyperref Info: PDF/A mode OFF on input line 6460.
277
- LaTeX Info: Redefining \ref on input line 6500.
278
- LaTeX Info: Redefining \pageref on input line 6504.
279
- \Hy@abspage=\count198
280
- \c@Item=\count199
281
- \c@Hfootnote=\count266
282
- )
283
- Package hyperref Info: Driver (autodetected): hpdftex.
284
-
285
- (c:/texlive/2019/texmf-dist/tex/latex/hyperref/hpdftex.def
286
- File: hpdftex.def 2020/01/14 v7.00d Hyperref driver for pdfTeX
287
-
288
- (c:/texlive/2019/texmf-dist/tex/latex/atveryend/atveryend.sty
289
- Package: atveryend 2019-12-11 v1.11 Hooks at the very end of document (HO)
290
- Package atveryend Info: \enddocument detected (standard20110627).
291
- )
292
- \Fld@listcount=\count267
293
- \c@bookmark@seq@number=\count268
294
-
295
- (c:/texlive/2019/texmf-dist/tex/latex/rerunfilecheck/rerunfilecheck.sty
296
- Package: rerunfilecheck 2019/12/05 v1.9 Rerun checks for auxiliary files (HO)
297
-
298
- (c:/texlive/2019/texmf-dist/tex/generic/uniquecounter/uniquecounter.sty
299
- Package: uniquecounter 2019/12/15 v1.4 Provide unlimited unique counter (HO)
300
- )
301
- Package uniquecounter Info: New unique counter `rerunfilecheck' on input line 2
302
- 86.
303
- )
304
- \Hy@SectionHShift=\skip63
305
- )
306
- (c:/texlive/2019/texmf-dist/tex/latex/algorithmicx/algorithmicx.sty
307
- Package: algorithmicx 2005/04/27 v1.2 Algorithmicx
308
-
309
- (c:/texlive/2019/texmf-dist/tex/latex/base/ifthen.sty
310
- Package: ifthen 2014/09/29 v1.1c Standard LaTeX ifthen package (DPC)
311
- )
312
- Document Style algorithmicx 1.2 - a greatly improved `algorithmic' style
313
- \c@ALG@line=\count269
314
- \c@ALG@rem=\count270
315
- \c@ALG@nested=\count271
316
- \ALG@tlm=\skip64
317
- \ALG@thistlm=\skip65
318
- \c@ALG@Lnr=\count272
319
- \c@ALG@blocknr=\count273
320
- \c@ALG@storecount=\count274
321
- \c@ALG@tmpcounter=\count275
322
- \ALG@tmplength=\skip66
323
- ) (c:/texlive/2019/texmf-dist/tex/latex/l3backend/l3backend-pdfmode.def
324
- File: l3backend-pdfmode.def 2020-02-23 L3 backend support: PDF mode
325
- \l__kernel_color_stack_int=\count276
326
- \l__pdf_internal_box=\box47
327
- )
328
- (./main.aux)
329
- \openout1 = `main.aux'.
330
-
331
- LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 17.
332
- LaTeX Font Info: ... okay on input line 17.
333
- LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 17.
334
- LaTeX Font Info: ... okay on input line 17.
335
- LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 17.
336
- LaTeX Font Info: ... okay on input line 17.
337
- LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 17.
338
- LaTeX Font Info: ... okay on input line 17.
339
- LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 17.
340
- LaTeX Font Info: ... okay on input line 17.
341
- LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 17.
342
- LaTeX Font Info: ... okay on input line 17.
343
- LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 17.
344
- LaTeX Font Info: ... okay on input line 17.
345
- LaTeX Font Info: Checking defaults for PD1/pdf/m/n on input line 17.
346
- LaTeX Font Info: ... okay on input line 17.
347
- LaTeX Font Info: Trying to load font information for OT1+ptm on input line 1
348
- 7.
349
- (c:/texlive/2019/texmf-dist/tex/latex/psnfss/ot1ptm.fd
350
- File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm.
351
- )
352
- (c:/texlive/2019/texmf-dist/tex/context/base/mkii/supp-pdf.mkii
353
- [Loading MPS to PDF converter (version 2006.09.02).]
354
- \scratchcounter=\count277
355
- \scratchdimen=\dimen163
356
- \scratchbox=\box48
357
- \nofMPsegments=\count278
358
- \nofMParguments=\count279
359
- \everyMPshowfont=\toks21
360
- \MPscratchCnt=\count280
361
- \MPscratchDim=\dimen164
362
- \MPnumerator=\count281
363
- \makeMPintoPDFobject=\count282
364
- \everyMPtoPDFconversion=\toks22
365
- ) (c:/texlive/2019/texmf-dist/tex/latex/epstopdf-pkg/epstopdf-base.sty
366
- Package: epstopdf-base 2020-01-24 v2.11 Base part for package epstopdf
367
- Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4
368
- 85.
369
-
370
- (c:/texlive/2019/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg
371
- File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv
372
- e
373
- ))
374
- \AtBeginShipoutBox=\box49
375
- Package hyperref Info: Link coloring OFF on input line 17.
376
-
377
- (c:/texlive/2019/texmf-dist/tex/latex/hyperref/nameref.sty
378
- Package: nameref 2019/09/16 v2.46 Cross-referencing by name of section
379
-
380
- (c:/texlive/2019/texmf-dist/tex/latex/refcount/refcount.sty
381
- Package: refcount 2019/12/15 v3.6 Data extraction from label references (HO)
382
- )
383
- (c:/texlive/2019/texmf-dist/tex/generic/gettitlestring/gettitlestring.sty
384
- Package: gettitlestring 2019/12/15 v1.6 Cleanup title references (HO)
385
- )
386
- \c@section@level=\count283
387
- )
388
- LaTeX Info: Redefining \ref on input line 17.
389
- LaTeX Info: Redefining \pageref on input line 17.
390
- LaTeX Info: Redefining \nameref on input line 17.
391
-
392
- (./main.out) (./main.out)
393
- \@outlinefile=\write3
394
- \openout3 = `main.out'.
395
-
396
- LaTeX Font Info: Trying to load font information for U+msa on input line 19.
397
-
398
-
399
- (c:/texlive/2019/texmf-dist/tex/latex/amsfonts/umsa.fd
400
- File: umsa.fd 2013/01/14 v3.01 AMS symbols A
401
- )
402
- LaTeX Font Info: Trying to load font information for U+msb on input line 19.
403
-
404
-
405
- (c:/texlive/2019/texmf-dist/tex/latex/amsfonts/umsb.fd
406
- File: umsb.fd 2013/01/14 v3.01 AMS symbols B
407
- ) (./abstract.tex)
408
- (./introduction.tex
409
- Missing character: There is no � in font ptmr7t!
410
- Missing character: There is no � in font ptmr7t!
411
- ) (./related works.tex [1{c:/texlive/2019/texmf-var/fonts/map/pdftex/updmap/pdf
412
- tex.map}
413
-
414
- ]) (./backgrounds.tex [2]) (./methodology.tex)
415
- (./experiments.tex) (./conclusion.tex) (./main.bbl
416
- LaTeX Font Info: Trying to load font information for OT1+pcr on input line 1
417
- 3.
418
-
419
- (c:/texlive/2019/texmf-dist/tex/latex/psnfss/ot1pcr.fd
420
- File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr.
421
- ) [3]
422
- Missing character: There is no � in font ptmr7t!
423
- Missing character: There is no � in font ptmr7t!
424
- )
425
- Package atveryend Info: Empty hook `BeforeClearDocument' on input line 34.
426
- [4]
427
- Package atveryend Info: Empty hook `AfterLastShipout' on input line 34.
428
- (./main.aux)
429
- Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 34.
430
- Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 34.
431
- Package rerunfilecheck Info: File `main.out' has not changed.
432
- (rerunfilecheck) Checksum: 318D5997BC6EC56C0035AD484222C237;470.
433
- Package atveryend Info: Empty hook `AtVeryVeryEnd' on input line 34.
434
- )
435
- Here is how much of TeX's memory you used:
436
- 7981 strings out of 480994
437
- 109943 string characters out of 5916032
438
- 390206 words of memory out of 5000000
439
- 23274 multiletter control sequences out of 15000+600000
440
- 551097 words of font info for 60 fonts, out of 8000000 for 9000
441
- 1141 hyphenation exceptions out of 8191
442
- 40i,11n,49p,949b,440s stack positions out of 5000i,500n,10000p,200000b,80000s
443
- {c:/texlive/2019/texmf-dist/fonts/enc/dvips/base/8r.enc}<c:/texlive/2019/texm
444
- f-dist/fonts/type1/public/amsfonts/cm/cmex10.pfb><c:/texlive/2019/texmf-dist/fo
445
- nts/type1/public/amsfonts/cm/cmmi10.pfb><c:/texlive/2019/texmf-dist/fonts/type1
446
- /public/amsfonts/cm/cmmi7.pfb><c:/texlive/2019/texmf-dist/fonts/type1/public/am
447
- sfonts/cm/cmr10.pfb><c:/texlive/2019/texmf-dist/fonts/type1/public/amsfonts/cm/
448
- cmr7.pfb><c:/texlive/2019/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy10.pfb>
449
- <c:/texlive/2019/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy5.pfb><c:/texliv
450
- e/2019/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.pfb><c:/texlive/2019/tex
451
- mf-dist/fonts/type1/urw/courier/ucrr8a.pfb><c:/texlive/2019/texmf-dist/fonts/ty
452
- pe1/urw/times/utmb8a.pfb><c:/texlive/2019/texmf-dist/fonts/type1/urw/times/utmr
453
- 8a.pfb><c:/texlive/2019/texmf-dist/fonts/type1/urw/times/utmri8a.pfb>
454
- Output written on main.pdf (4 pages, 135265 bytes).
455
- PDF statistics:
456
- 247 PDF objects out of 1000 (max. 8388607)
457
- 226 compressed objects within 3 object streams
458
- 37 named destinations out of 1000 (max. 500000)
459
- 57 words of extra memory for PDF output out of 10000 (max. 10000000)
460
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/main.out DELETED
@@ -1,7 +0,0 @@
1
- \BOOKMARK [1][-]{section.1}{introduction}{}% 1
2
- \BOOKMARK [1][-]{section.2}{related works}{}% 2
3
- \BOOKMARK [1][-]{section.3}{backgrounds}{}% 3
4
- \BOOKMARK [2][-]{subsection.3.1}{Problem Statement and Foundational Concepts}{section.3}% 4
5
- \BOOKMARK [2][-]{subsection.3.2}{Q-Learning and Related Algorithms}{section.3}% 5
6
- \BOOKMARK [2][-]{subsection.3.3}{Policy Gradient Methods}{section.3}% 6
7
- \BOOKMARK [2][-]{subsection.3.4}{Methodology and Evaluation Metrics}{section.3}% 7
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/main.pdf DELETED
Binary file (135 kB)
 
outputs/outputs_20230421_000752/main.synctex.gz DELETED
Binary file (46 kB)
 
outputs/outputs_20230421_000752/main.tex DELETED
@@ -1,34 +0,0 @@
1
- \documentclass{article} % For LaTeX2e
2
- \UseRawInputEncoding
3
- \usepackage{graphicx}
4
- \usepackage{booktabs}
5
- \usepackage{iclr2022_conference, times}
6
- \input{math_commands.tex}
7
- \usepackage{hyperref}
8
- \usepackage{url}
9
- \usepackage{algorithmicx}
10
-
11
- \title{A Survey on Reinforcement Learning}
12
- \author{GPT-4}
13
-
14
- \newcommand{\fix}{\marginpar{FIX}}
15
- \newcommand{\new}{\marginpar{NEW}}
16
-
17
- \begin{document}
18
- \maketitle
19
- \input{abstract.tex}
20
- \input{introduction.tex}
21
- \input{related works.tex}
22
- \input{backgrounds.tex}
23
- \input{methodology.tex}
24
- \input{experiments.tex}
25
- \input{conclusion.tex}
26
-
27
- \bibliography{ref}
28
- \bibliographystyle{iclr2022_conference}
29
-
30
- %\appendix
31
- %\section{Appendix}
32
- %You may include other additional sections here.
33
-
34
- \end{document}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/math_commands.tex DELETED
@@ -1,508 +0,0 @@
1
- %%%%% NEW MATH DEFINITIONS %%%%%
2
-
3
- \usepackage{amsmath,amsfonts,bm}
4
-
5
- % Mark sections of captions for referring to divisions of figures
6
- \newcommand{\figleft}{{\em (Left)}}
7
- \newcommand{\figcenter}{{\em (Center)}}
8
- \newcommand{\figright}{{\em (Right)}}
9
- \newcommand{\figtop}{{\em (Top)}}
10
- \newcommand{\figbottom}{{\em (Bottom)}}
11
- \newcommand{\captiona}{{\em (a)}}
12
- \newcommand{\captionb}{{\em (b)}}
13
- \newcommand{\captionc}{{\em (c)}}
14
- \newcommand{\captiond}{{\em (d)}}
15
-
16
- % Highlight a newly defined term
17
- \newcommand{\newterm}[1]{{\bf #1}}
18
-
19
-
20
- % Figure reference, lower-case.
21
- \def\figref#1{figure~\ref{#1}}
22
- % Figure reference, capital. For start of sentence
23
- \def\Figref#1{Figure~\ref{#1}}
24
- \def\twofigref#1#2{figures \ref{#1} and \ref{#2}}
25
- \def\quadfigref#1#2#3#4{figures \ref{#1}, \ref{#2}, \ref{#3} and \ref{#4}}
26
- % Section reference, lower-case.
27
- \def\secref#1{section~\ref{#1}}
28
- % Section reference, capital.
29
- \def\Secref#1{Section~\ref{#1}}
30
- % Reference to two sections.
31
- \def\twosecrefs#1#2{sections \ref{#1} and \ref{#2}}
32
- % Reference to three sections.
33
- \def\secrefs#1#2#3{sections \ref{#1}, \ref{#2} and \ref{#3}}
34
- % Reference to an equation, lower-case.
35
- \def\eqref#1{equation~\ref{#1}}
36
- % Reference to an equation, upper case
37
- \def\Eqref#1{Equation~\ref{#1}}
38
- % A raw reference to an equation---avoid using if possible
39
- \def\plaineqref#1{\ref{#1}}
40
- % Reference to a chapter, lower-case.
41
- \def\chapref#1{chapter~\ref{#1}}
42
- % Reference to an equation, upper case.
43
- \def\Chapref#1{Chapter~\ref{#1}}
44
- % Reference to a range of chapters
45
- \def\rangechapref#1#2{chapters\ref{#1}--\ref{#2}}
46
- % Reference to an algorithm, lower-case.
47
- \def\algref#1{algorithm~\ref{#1}}
48
- % Reference to an algorithm, upper case.
49
- \def\Algref#1{Algorithm~\ref{#1}}
50
- \def\twoalgref#1#2{algorithms \ref{#1} and \ref{#2}}
51
- \def\Twoalgref#1#2{Algorithms \ref{#1} and \ref{#2}}
52
- % Reference to a part, lower case
53
- \def\partref#1{part~\ref{#1}}
54
- % Reference to a part, upper case
55
- \def\Partref#1{Part~\ref{#1}}
56
- \def\twopartref#1#2{parts \ref{#1} and \ref{#2}}
57
-
58
- \def\ceil#1{\lceil #1 \rceil}
59
- \def\floor#1{\lfloor #1 \rfloor}
60
- \def\1{\bm{1}}
61
- \newcommand{\train}{\mathcal{D}}
62
- \newcommand{\valid}{\mathcal{D_{\mathrm{valid}}}}
63
- \newcommand{\test}{\mathcal{D_{\mathrm{test}}}}
64
-
65
- \def\eps{{\epsilon}}
66
-
67
-
68
- % Random variables
69
- \def\reta{{\textnormal{$\eta$}}}
70
- \def\ra{{\textnormal{a}}}
71
- \def\rb{{\textnormal{b}}}
72
- \def\rc{{\textnormal{c}}}
73
- \def\rd{{\textnormal{d}}}
74
- \def\re{{\textnormal{e}}}
75
- \def\rf{{\textnormal{f}}}
76
- \def\rg{{\textnormal{g}}}
77
- \def\rh{{\textnormal{h}}}
78
- \def\ri{{\textnormal{i}}}
79
- \def\rj{{\textnormal{j}}}
80
- \def\rk{{\textnormal{k}}}
81
- \def\rl{{\textnormal{l}}}
82
- % rm is already a command, just don't name any random variables m
83
- \def\rn{{\textnormal{n}}}
84
- \def\ro{{\textnormal{o}}}
85
- \def\rp{{\textnormal{p}}}
86
- \def\rq{{\textnormal{q}}}
87
- \def\rr{{\textnormal{r}}}
88
- \def\rs{{\textnormal{s}}}
89
- \def\rt{{\textnormal{t}}}
90
- \def\ru{{\textnormal{u}}}
91
- \def\rv{{\textnormal{v}}}
92
- \def\rw{{\textnormal{w}}}
93
- \def\rx{{\textnormal{x}}}
94
- \def\ry{{\textnormal{y}}}
95
- \def\rz{{\textnormal{z}}}
96
-
97
- % Random vectors
98
- \def\rvepsilon{{\mathbf{\epsilon}}}
99
- \def\rvtheta{{\mathbf{\theta}}}
100
- \def\rva{{\mathbf{a}}}
101
- \def\rvb{{\mathbf{b}}}
102
- \def\rvc{{\mathbf{c}}}
103
- \def\rvd{{\mathbf{d}}}
104
- \def\rve{{\mathbf{e}}}
105
- \def\rvf{{\mathbf{f}}}
106
- \def\rvg{{\mathbf{g}}}
107
- \def\rvh{{\mathbf{h}}}
108
- \def\rvu{{\mathbf{i}}}
109
- \def\rvj{{\mathbf{j}}}
110
- \def\rvk{{\mathbf{k}}}
111
- \def\rvl{{\mathbf{l}}}
112
- \def\rvm{{\mathbf{m}}}
113
- \def\rvn{{\mathbf{n}}}
114
- \def\rvo{{\mathbf{o}}}
115
- \def\rvp{{\mathbf{p}}}
116
- \def\rvq{{\mathbf{q}}}
117
- \def\rvr{{\mathbf{r}}}
118
- \def\rvs{{\mathbf{s}}}
119
- \def\rvt{{\mathbf{t}}}
120
- \def\rvu{{\mathbf{u}}}
121
- \def\rvv{{\mathbf{v}}}
122
- \def\rvw{{\mathbf{w}}}
123
- \def\rvx{{\mathbf{x}}}
124
- \def\rvy{{\mathbf{y}}}
125
- \def\rvz{{\mathbf{z}}}
126
-
127
- % Elements of random vectors
128
- \def\erva{{\textnormal{a}}}
129
- \def\ervb{{\textnormal{b}}}
130
- \def\ervc{{\textnormal{c}}}
131
- \def\ervd{{\textnormal{d}}}
132
- \def\erve{{\textnormal{e}}}
133
- \def\ervf{{\textnormal{f}}}
134
- \def\ervg{{\textnormal{g}}}
135
- \def\ervh{{\textnormal{h}}}
136
- \def\ervi{{\textnormal{i}}}
137
- \def\ervj{{\textnormal{j}}}
138
- \def\ervk{{\textnormal{k}}}
139
- \def\ervl{{\textnormal{l}}}
140
- \def\ervm{{\textnormal{m}}}
141
- \def\ervn{{\textnormal{n}}}
142
- \def\ervo{{\textnormal{o}}}
143
- \def\ervp{{\textnormal{p}}}
144
- \def\ervq{{\textnormal{q}}}
145
- \def\ervr{{\textnormal{r}}}
146
- \def\ervs{{\textnormal{s}}}
147
- \def\ervt{{\textnormal{t}}}
148
- \def\ervu{{\textnormal{u}}}
149
- \def\ervv{{\textnormal{v}}}
150
- \def\ervw{{\textnormal{w}}}
151
- \def\ervx{{\textnormal{x}}}
152
- \def\ervy{{\textnormal{y}}}
153
- \def\ervz{{\textnormal{z}}}
154
-
155
- % Random matrices
156
- \def\rmA{{\mathbf{A}}}
157
- \def\rmB{{\mathbf{B}}}
158
- \def\rmC{{\mathbf{C}}}
159
- \def\rmD{{\mathbf{D}}}
160
- \def\rmE{{\mathbf{E}}}
161
- \def\rmF{{\mathbf{F}}}
162
- \def\rmG{{\mathbf{G}}}
163
- \def\rmH{{\mathbf{H}}}
164
- \def\rmI{{\mathbf{I}}}
165
- \def\rmJ{{\mathbf{J}}}
166
- \def\rmK{{\mathbf{K}}}
167
- \def\rmL{{\mathbf{L}}}
168
- \def\rmM{{\mathbf{M}}}
169
- \def\rmN{{\mathbf{N}}}
170
- \def\rmO{{\mathbf{O}}}
171
- \def\rmP{{\mathbf{P}}}
172
- \def\rmQ{{\mathbf{Q}}}
173
- \def\rmR{{\mathbf{R}}}
174
- \def\rmS{{\mathbf{S}}}
175
- \def\rmT{{\mathbf{T}}}
176
- \def\rmU{{\mathbf{U}}}
177
- \def\rmV{{\mathbf{V}}}
178
- \def\rmW{{\mathbf{W}}}
179
- \def\rmX{{\mathbf{X}}}
180
- \def\rmY{{\mathbf{Y}}}
181
- \def\rmZ{{\mathbf{Z}}}
182
-
183
- % Elements of random matrices
184
- \def\ermA{{\textnormal{A}}}
185
- \def\ermB{{\textnormal{B}}}
186
- \def\ermC{{\textnormal{C}}}
187
- \def\ermD{{\textnormal{D}}}
188
- \def\ermE{{\textnormal{E}}}
189
- \def\ermF{{\textnormal{F}}}
190
- \def\ermG{{\textnormal{G}}}
191
- \def\ermH{{\textnormal{H}}}
192
- \def\ermI{{\textnormal{I}}}
193
- \def\ermJ{{\textnormal{J}}}
194
- \def\ermK{{\textnormal{K}}}
195
- \def\ermL{{\textnormal{L}}}
196
- \def\ermM{{\textnormal{M}}}
197
- \def\ermN{{\textnormal{N}}}
198
- \def\ermO{{\textnormal{O}}}
199
- \def\ermP{{\textnormal{P}}}
200
- \def\ermQ{{\textnormal{Q}}}
201
- \def\ermR{{\textnormal{R}}}
202
- \def\ermS{{\textnormal{S}}}
203
- \def\ermT{{\textnormal{T}}}
204
- \def\ermU{{\textnormal{U}}}
205
- \def\ermV{{\textnormal{V}}}
206
- \def\ermW{{\textnormal{W}}}
207
- \def\ermX{{\textnormal{X}}}
208
- \def\ermY{{\textnormal{Y}}}
209
- \def\ermZ{{\textnormal{Z}}}
210
-
211
- % Vectors
212
- \def\vzero{{\bm{0}}}
213
- \def\vone{{\bm{1}}}
214
- \def\vmu{{\bm{\mu}}}
215
- \def\vtheta{{\bm{\theta}}}
216
- \def\va{{\bm{a}}}
217
- \def\vb{{\bm{b}}}
218
- \def\vc{{\bm{c}}}
219
- \def\vd{{\bm{d}}}
220
- \def\ve{{\bm{e}}}
221
- \def\vf{{\bm{f}}}
222
- \def\vg{{\bm{g}}}
223
- \def\vh{{\bm{h}}}
224
- \def\vi{{\bm{i}}}
225
- \def\vj{{\bm{j}}}
226
- \def\vk{{\bm{k}}}
227
- \def\vl{{\bm{l}}}
228
- \def\vm{{\bm{m}}}
229
- \def\vn{{\bm{n}}}
230
- \def\vo{{\bm{o}}}
231
- \def\vp{{\bm{p}}}
232
- \def\vq{{\bm{q}}}
233
- \def\vr{{\bm{r}}}
234
- \def\vs{{\bm{s}}}
235
- \def\vt{{\bm{t}}}
236
- \def\vu{{\bm{u}}}
237
- \def\vv{{\bm{v}}}
238
- \def\vw{{\bm{w}}}
239
- \def\vx{{\bm{x}}}
240
- \def\vy{{\bm{y}}}
241
- \def\vz{{\bm{z}}}
242
-
243
- % Elements of vectors
244
- \def\evalpha{{\alpha}}
245
- \def\evbeta{{\beta}}
246
- \def\evepsilon{{\epsilon}}
247
- \def\evlambda{{\lambda}}
248
- \def\evomega{{\omega}}
249
- \def\evmu{{\mu}}
250
- \def\evpsi{{\psi}}
251
- \def\evsigma{{\sigma}}
252
- \def\evtheta{{\theta}}
253
- \def\eva{{a}}
254
- \def\evb{{b}}
255
- \def\evc{{c}}
256
- \def\evd{{d}}
257
- \def\eve{{e}}
258
- \def\evf{{f}}
259
- \def\evg{{g}}
260
- \def\evh{{h}}
261
- \def\evi{{i}}
262
- \def\evj{{j}}
263
- \def\evk{{k}}
264
- \def\evl{{l}}
265
- \def\evm{{m}}
266
- \def\evn{{n}}
267
- \def\evo{{o}}
268
- \def\evp{{p}}
269
- \def\evq{{q}}
270
- \def\evr{{r}}
271
- \def\evs{{s}}
272
- \def\evt{{t}}
273
- \def\evu{{u}}
274
- \def\evv{{v}}
275
- \def\evw{{w}}
276
- \def\evx{{x}}
277
- \def\evy{{y}}
278
- \def\evz{{z}}
279
-
280
- % Matrix
281
- \def\mA{{\bm{A}}}
282
- \def\mB{{\bm{B}}}
283
- \def\mC{{\bm{C}}}
284
- \def\mD{{\bm{D}}}
285
- \def\mE{{\bm{E}}}
286
- \def\mF{{\bm{F}}}
287
- \def\mG{{\bm{G}}}
288
- \def\mH{{\bm{H}}}
289
- \def\mI{{\bm{I}}}
290
- \def\mJ{{\bm{J}}}
291
- \def\mK{{\bm{K}}}
292
- \def\mL{{\bm{L}}}
293
- \def\mM{{\bm{M}}}
294
- \def\mN{{\bm{N}}}
295
- \def\mO{{\bm{O}}}
296
- \def\mP{{\bm{P}}}
297
- \def\mQ{{\bm{Q}}}
298
- \def\mR{{\bm{R}}}
299
- \def\mS{{\bm{S}}}
300
- \def\mT{{\bm{T}}}
301
- \def\mU{{\bm{U}}}
302
- \def\mV{{\bm{V}}}
303
- \def\mW{{\bm{W}}}
304
- \def\mX{{\bm{X}}}
305
- \def\mY{{\bm{Y}}}
306
- \def\mZ{{\bm{Z}}}
307
- \def\mBeta{{\bm{\beta}}}
308
- \def\mPhi{{\bm{\Phi}}}
309
- \def\mLambda{{\bm{\Lambda}}}
310
- \def\mSigma{{\bm{\Sigma}}}
311
-
312
- % Tensor
313
- \DeclareMathAlphabet{\mathsfit}{\encodingdefault}{\sfdefault}{m}{sl}
314
- \SetMathAlphabet{\mathsfit}{bold}{\encodingdefault}{\sfdefault}{bx}{n}
315
- \newcommand{\tens}[1]{\bm{\mathsfit{#1}}}
316
- \def\tA{{\tens{A}}}
317
- \def\tB{{\tens{B}}}
318
- \def\tC{{\tens{C}}}
319
- \def\tD{{\tens{D}}}
320
- \def\tE{{\tens{E}}}
321
- \def\tF{{\tens{F}}}
322
- \def\tG{{\tens{G}}}
323
- \def\tH{{\tens{H}}}
324
- \def\tI{{\tens{I}}}
325
- \def\tJ{{\tens{J}}}
326
- \def\tK{{\tens{K}}}
327
- \def\tL{{\tens{L}}}
328
- \def\tM{{\tens{M}}}
329
- \def\tN{{\tens{N}}}
330
- \def\tO{{\tens{O}}}
331
- \def\tP{{\tens{P}}}
332
- \def\tQ{{\tens{Q}}}
333
- \def\tR{{\tens{R}}}
334
- \def\tS{{\tens{S}}}
335
- \def\tT{{\tens{T}}}
336
- \def\tU{{\tens{U}}}
337
- \def\tV{{\tens{V}}}
338
- \def\tW{{\tens{W}}}
339
- \def\tX{{\tens{X}}}
340
- \def\tY{{\tens{Y}}}
341
- \def\tZ{{\tens{Z}}}
342
-
343
-
344
- % Graph
345
- \def\gA{{\mathcal{A}}}
346
- \def\gB{{\mathcal{B}}}
347
- \def\gC{{\mathcal{C}}}
348
- \def\gD{{\mathcal{D}}}
349
- \def\gE{{\mathcal{E}}}
350
- \def\gF{{\mathcal{F}}}
351
- \def\gG{{\mathcal{G}}}
352
- \def\gH{{\mathcal{H}}}
353
- \def\gI{{\mathcal{I}}}
354
- \def\gJ{{\mathcal{J}}}
355
- \def\gK{{\mathcal{K}}}
356
- \def\gL{{\mathcal{L}}}
357
- \def\gM{{\mathcal{M}}}
358
- \def\gN{{\mathcal{N}}}
359
- \def\gO{{\mathcal{O}}}
360
- \def\gP{{\mathcal{P}}}
361
- \def\gQ{{\mathcal{Q}}}
362
- \def\gR{{\mathcal{R}}}
363
- \def\gS{{\mathcal{S}}}
364
- \def\gT{{\mathcal{T}}}
365
- \def\gU{{\mathcal{U}}}
366
- \def\gV{{\mathcal{V}}}
367
- \def\gW{{\mathcal{W}}}
368
- \def\gX{{\mathcal{X}}}
369
- \def\gY{{\mathcal{Y}}}
370
- \def\gZ{{\mathcal{Z}}}
371
-
372
- % Sets
373
- \def\sA{{\mathbb{A}}}
374
- \def\sB{{\mathbb{B}}}
375
- \def\sC{{\mathbb{C}}}
376
- \def\sD{{\mathbb{D}}}
377
- % Don't use a set called E, because this would be the same as our symbol
378
- % for expectation.
379
- \def\sF{{\mathbb{F}}}
380
- \def\sG{{\mathbb{G}}}
381
- \def\sH{{\mathbb{H}}}
382
- \def\sI{{\mathbb{I}}}
383
- \def\sJ{{\mathbb{J}}}
384
- \def\sK{{\mathbb{K}}}
385
- \def\sL{{\mathbb{L}}}
386
- \def\sM{{\mathbb{M}}}
387
- \def\sN{{\mathbb{N}}}
388
- \def\sO{{\mathbb{O}}}
389
- \def\sP{{\mathbb{P}}}
390
- \def\sQ{{\mathbb{Q}}}
391
- \def\sR{{\mathbb{R}}}
392
- \def\sS{{\mathbb{S}}}
393
- \def\sT{{\mathbb{T}}}
394
- \def\sU{{\mathbb{U}}}
395
- \def\sV{{\mathbb{V}}}
396
- \def\sW{{\mathbb{W}}}
397
- \def\sX{{\mathbb{X}}}
398
- \def\sY{{\mathbb{Y}}}
399
- \def\sZ{{\mathbb{Z}}}
400
-
401
- % Entries of a matrix
402
- \def\emLambda{{\Lambda}}
403
- \def\emA{{A}}
404
- \def\emB{{B}}
405
- \def\emC{{C}}
406
- \def\emD{{D}}
407
- \def\emE{{E}}
408
- \def\emF{{F}}
409
- \def\emG{{G}}
410
- \def\emH{{H}}
411
- \def\emI{{I}}
412
- \def\emJ{{J}}
413
- \def\emK{{K}}
414
- \def\emL{{L}}
415
- \def\emM{{M}}
416
- \def\emN{{N}}
417
- \def\emO{{O}}
418
- \def\emP{{P}}
419
- \def\emQ{{Q}}
420
- \def\emR{{R}}
421
- \def\emS{{S}}
422
- \def\emT{{T}}
423
- \def\emU{{U}}
424
- \def\emV{{V}}
425
- \def\emW{{W}}
426
- \def\emX{{X}}
427
- \def\emY{{Y}}
428
- \def\emZ{{Z}}
429
- \def\emSigma{{\Sigma}}
430
-
431
- % entries of a tensor
432
- % Same font as tensor, without \bm wrapper
433
- \newcommand{\etens}[1]{\mathsfit{#1}}
434
- \def\etLambda{{\etens{\Lambda}}}
435
- \def\etA{{\etens{A}}}
436
- \def\etB{{\etens{B}}}
437
- \def\etC{{\etens{C}}}
438
- \def\etD{{\etens{D}}}
439
- \def\etE{{\etens{E}}}
440
- \def\etF{{\etens{F}}}
441
- \def\etG{{\etens{G}}}
442
- \def\etH{{\etens{H}}}
443
- \def\etI{{\etens{I}}}
444
- \def\etJ{{\etens{J}}}
445
- \def\etK{{\etens{K}}}
446
- \def\etL{{\etens{L}}}
447
- \def\etM{{\etens{M}}}
448
- \def\etN{{\etens{N}}}
449
- \def\etO{{\etens{O}}}
450
- \def\etP{{\etens{P}}}
451
- \def\etQ{{\etens{Q}}}
452
- \def\etR{{\etens{R}}}
453
- \def\etS{{\etens{S}}}
454
- \def\etT{{\etens{T}}}
455
- \def\etU{{\etens{U}}}
456
- \def\etV{{\etens{V}}}
457
- \def\etW{{\etens{W}}}
458
- \def\etX{{\etens{X}}}
459
- \def\etY{{\etens{Y}}}
460
- \def\etZ{{\etens{Z}}}
461
-
462
- % The true underlying data generating distribution
463
- \newcommand{\pdata}{p_{\rm{data}}}
464
- % The empirical distribution defined by the training set
465
- \newcommand{\ptrain}{\hat{p}_{\rm{data}}}
466
- \newcommand{\Ptrain}{\hat{P}_{\rm{data}}}
467
- % The model distribution
468
- \newcommand{\pmodel}{p_{\rm{model}}}
469
- \newcommand{\Pmodel}{P_{\rm{model}}}
470
- \newcommand{\ptildemodel}{\tilde{p}_{\rm{model}}}
471
- % Stochastic autoencoder distributions
472
- \newcommand{\pencode}{p_{\rm{encoder}}}
473
- \newcommand{\pdecode}{p_{\rm{decoder}}}
474
- \newcommand{\precons}{p_{\rm{reconstruct}}}
475
-
476
- \newcommand{\laplace}{\mathrm{Laplace}} % Laplace distribution
477
-
478
- \newcommand{\E}{\mathbb{E}}
479
- \newcommand{\Ls}{\mathcal{L}}
480
- \newcommand{\R}{\mathbb{R}}
481
- \newcommand{\emp}{\tilde{p}}
482
- \newcommand{\lr}{\alpha}
483
- \newcommand{\reg}{\lambda}
484
- \newcommand{\rect}{\mathrm{rectifier}}
485
- \newcommand{\softmax}{\mathrm{softmax}}
486
- \newcommand{\sigmoid}{\sigma}
487
- \newcommand{\softplus}{\zeta}
488
- \newcommand{\KL}{D_{\mathrm{KL}}}
489
- \newcommand{\Var}{\mathrm{Var}}
490
- \newcommand{\standarderror}{\mathrm{SE}}
491
- \newcommand{\Cov}{\mathrm{Cov}}
492
- % Wolfram Mathworld says $L^2$ is for function spaces and $\ell^2$ is for vectors
493
- % But then they seem to use $L^2$ for vectors throughout the site, and so does
494
- % wikipedia.
495
- \newcommand{\normlzero}{L^0}
496
- \newcommand{\normlone}{L^1}
497
- \newcommand{\normltwo}{L^2}
498
- \newcommand{\normlp}{L^p}
499
- \newcommand{\normmax}{L^\infty}
500
-
501
- \newcommand{\parents}{Pa} % See usage in notation.tex. Chosen to match Daphne's book.
502
-
503
- \DeclareMathOperator*{\argmax}{arg\,max}
504
- \DeclareMathOperator*{\argmin}{arg\,min}
505
-
506
- \DeclareMathOperator{\sign}{sign}
507
- \DeclareMathOperator{\Tr}{Tr}
508
- \let\ab\allowbreak
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/methodology.tex DELETED
File without changes
outputs/outputs_20230421_000752/natbib.sty DELETED
@@ -1,1246 +0,0 @@
1
- %%
2
- %% This is file `natbib.sty',
3
- %% generated with the docstrip utility.
4
- %%
5
- %% The original source files were:
6
- %%
7
- %% natbib.dtx (with options: `package,all')
8
- %% =============================================
9
- %% IMPORTANT NOTICE:
10
- %%
11
- %% This program can be redistributed and/or modified under the terms
12
- %% of the LaTeX Project Public License Distributed from CTAN
13
- %% archives in directory macros/latex/base/lppl.txt; either
14
- %% version 1 of the License, or any later version.
15
- %%
16
- %% This is a generated file.
17
- %% It may not be distributed without the original source file natbib.dtx.
18
- %%
19
- %% Full documentation can be obtained by LaTeXing that original file.
20
- %% Only a few abbreviated comments remain here to describe the usage.
21
- %% =============================================
22
- %% Copyright 1993-2009 Patrick W Daly
23
- %% Max-Planck-Institut f\"ur Sonnensystemforschung
24
- %% Max-Planck-Str. 2
25
- %% D-37191 Katlenburg-Lindau
26
- %% Germany
27
- %% E-mail: daly@mps.mpg.de
28
- \NeedsTeXFormat{LaTeX2e}[1995/06/01]
29
- \ProvidesPackage{natbib}
30
- [2009/07/16 8.31 (PWD, AO)]
31
-
32
- % This package reimplements the LaTeX \cite command to be used for various
33
- % citation styles, both author-year and numerical. It accepts BibTeX
34
- % output intended for many other packages, and therefore acts as a
35
- % general, all-purpose citation-style interface.
36
- %
37
- % With standard numerical .bst files, only numerical citations are
38
- % possible. With an author-year .bst file, both numerical and
39
- % author-year citations are possible.
40
- %
41
- % If author-year citations are selected, \bibitem must have one of the
42
- % following forms:
43
- % \bibitem[Jones et al.(1990)]{key}...
44
- % \bibitem[Jones et al.(1990)Jones, Baker, and Williams]{key}...
45
- % \bibitem[Jones et al., 1990]{key}...
46
- % \bibitem[\protect\citeauthoryear{Jones, Baker, and Williams}{Jones
47
- % et al.}{1990}]{key}...
48
- % \bibitem[\protect\citeauthoryear{Jones et al.}{1990}]{key}...
49
- % \bibitem[\protect\astroncite{Jones et al.}{1990}]{key}...
50
- % \bibitem[\protect\citename{Jones et al., }1990]{key}...
51
- % \harvarditem[Jones et al.]{Jones, Baker, and Williams}{1990}{key}...
52
- %
53
- % This is either to be made up manually, or to be generated by an
54
- % appropriate .bst file with BibTeX.
55
- % Author-year mode || Numerical mode
56
- % Then, \citet{key} ==>> Jones et al. (1990) || Jones et al. [21]
57
- % \citep{key} ==>> (Jones et al., 1990) || [21]
58
- % Multiple citations as normal:
59
- % \citep{key1,key2} ==>> (Jones et al., 1990; Smith, 1989) || [21,24]
60
- % or (Jones et al., 1990, 1991) || [21,24]
61
- % or (Jones et al., 1990a,b) || [21,24]
62
- % \cite{key} is the equivalent of \citet{key} in author-year mode
63
- % and of \citep{key} in numerical mode
64
- % Full author lists may be forced with \citet* or \citep*, e.g.
65
- % \citep*{key} ==>> (Jones, Baker, and Williams, 1990)
66
- % Optional notes as:
67
- % \citep[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2)
68
- % \citep[e.g.,][]{key} ==>> (e.g., Jones et al., 1990)
69
- % \citep[see][pg. 34]{key}==>> (see Jones et al., 1990, pg. 34)
70
- % (Note: in standard LaTeX, only one note is allowed, after the ref.
71
- % Here, one note is like the standard, two make pre- and post-notes.)
72
- % \citealt{key} ==>> Jones et al. 1990
73
- % \citealt*{key} ==>> Jones, Baker, and Williams 1990
74
- % \citealp{key} ==>> Jones et al., 1990
75
- % \citealp*{key} ==>> Jones, Baker, and Williams, 1990
76
- % Additional citation possibilities (both author-year and numerical modes)
77
- % \citeauthor{key} ==>> Jones et al.
78
- % \citeauthor*{key} ==>> Jones, Baker, and Williams
79
- % \citeyear{key} ==>> 1990
80
- % \citeyearpar{key} ==>> (1990)
81
- % \citetext{priv. comm.} ==>> (priv. comm.)
82
- % \citenum{key} ==>> 11 [non-superscripted]
83
- % Note: full author lists depends on whether the bib style supports them;
84
- % if not, the abbreviated list is printed even when full requested.
85
- %
86
- % For names like della Robbia at the start of a sentence, use
87
- % \Citet{dRob98} ==>> Della Robbia (1998)
88
- % \Citep{dRob98} ==>> (Della Robbia, 1998)
89
- % \Citeauthor{dRob98} ==>> Della Robbia
90
- %
91
- %
92
- % Citation aliasing is achieved with
93
- % \defcitealias{key}{text}
94
- % \citetalias{key} ==>> text
95
- % \citepalias{key} ==>> (text)
96
- %
97
- % Defining the citation mode and punctual (citation style)
98
- % \setcitestyle{<comma-separated list of keywords, same
99
- % as the package options>}
100
- % Example: \setcitestyle{square,semicolon}
101
- % Alternatively:
102
- % Use \bibpunct with 6 mandatory arguments:
103
- % 1. opening bracket for citation
104
- % 2. closing bracket
105
- % 3. citation separator (for multiple citations in one \cite)
106
- % 4. the letter n for numerical styles, s for superscripts
107
- % else anything for author-year
108
- % 5. punctuation between authors and date
109
- % 6. punctuation between years (or numbers) when common authors missing
110
- % One optional argument is the character coming before post-notes. It
111
- % appears in square braces before all other arguments. May be left off.
112
- % Example (and default) \bibpunct[, ]{(}{)}{;}{a}{,}{,}
113
- %
114
- % To make this automatic for a given bib style, named newbib, say, make
115
- % a local configuration file, natbib.cfg, with the definition
116
- % \newcommand{\bibstyle@newbib}{\bibpunct...}
117
- % Then the \bibliographystyle{newbib} will cause \bibstyle@newbib to
118
- % be called on THE NEXT LATEX RUN (via the aux file).
119
- %
120
- % Such preprogrammed definitions may be invoked anywhere in the text
121
- % by calling \citestyle{newbib}. This is only useful if the style specified
122
- % differs from that in \bibliographystyle.
123
- %
124
- % With \citeindextrue and \citeindexfalse, one can control whether the
125
- % \cite commands make an automatic entry of the citation in the .idx
126
- % indexing file. For this, \makeindex must also be given in the preamble.
127
- %
128
- % Package Options: (for selecting punctuation)
129
- % round - round parentheses are used (default)
130
- % square - square brackets are used [option]
131
- % curly - curly braces are used {option}
132
- % angle - angle brackets are used <option>
133
- % semicolon - multiple citations separated by semi-colon (default)
134
- % colon - same as semicolon, an earlier confusion
135
- % comma - separated by comma
136
- % authoryear - selects author-year citations (default)
137
- % numbers- selects numerical citations
138
- % super - numerical citations as superscripts
139
- % sort - sorts multiple citations according to order in ref. list
140
- % sort&compress - like sort, but also compresses numerical citations
141
- % compress - compresses without sorting
142
- % longnamesfirst - makes first citation full author list
143
- % sectionbib - puts bibliography in a \section* instead of \chapter*
144
- % merge - allows the citation key to have a * prefix,
145
- % signifying to merge its reference with that of the previous citation.
146
- % elide - if references are merged, repeated portions of later ones may be removed.
147
- % mcite - recognizes and ignores the * prefix for merging.
148
- % Punctuation so selected dominates over any predefined ones.
149
- % Package options are called as, e.g.
150
- % \usepackage[square,comma]{natbib}
151
- % LaTeX the source file natbib.dtx to obtain more details
152
- % or the file natnotes.tex for a brief reference sheet.
153
- %-----------------------------------------------------------
154
- \providecommand\@ifxundefined[1]{%
155
- \ifx#1\@undefined\expandafter\@firstoftwo\else\expandafter\@secondoftwo\fi
156
- }%
157
- \providecommand\@ifnum[1]{%
158
- \ifnum#1\expandafter\@firstoftwo\else\expandafter\@secondoftwo\fi
159
- }%
160
- \providecommand\@ifx[1]{%
161
- \ifx#1\expandafter\@firstoftwo\else\expandafter\@secondoftwo\fi
162
- }%
163
- \providecommand\appdef[2]{%
164
- \toks@\expandafter{#1}\@temptokena{#2}%
165
- \edef#1{\the\toks@\the\@temptokena}%
166
- }%
167
- \@ifclassloaded{agu2001}{\PackageError{natbib}
168
- {The agu2001 class already includes natbib coding,\MessageBreak
169
- so you should not add it explicitly}
170
- {Type <Return> for now, but then later remove\MessageBreak
171
- the command \protect\usepackage{natbib} from the document}
172
- \endinput}{}
173
- \@ifclassloaded{agutex}{\PackageError{natbib}
174
- {The AGUTeX class already includes natbib coding,\MessageBreak
175
- so you should not add it explicitly}
176
- {Type <Return> for now, but then later remove\MessageBreak
177
- the command \protect\usepackage{natbib} from the document}
178
- \endinput}{}
179
- \@ifclassloaded{aguplus}{\PackageError{natbib}
180
- {The aguplus class already includes natbib coding,\MessageBreak
181
- so you should not add it explicitly}
182
- {Type <Return> for now, but then later remove\MessageBreak
183
- the command \protect\usepackage{natbib} from the document}
184
- \endinput}{}
185
- \@ifclassloaded{nlinproc}{\PackageError{natbib}
186
- {The nlinproc class already includes natbib coding,\MessageBreak
187
- so you should not add it explicitly}
188
- {Type <Return> for now, but then later remove\MessageBreak
189
- the command \protect\usepackage{natbib} from the document}
190
- \endinput}{}
191
- \@ifclassloaded{egs}{\PackageError{natbib}
192
- {The egs class already includes natbib coding,\MessageBreak
193
- so you should not add it explicitly}
194
- {Type <Return> for now, but then later remove\MessageBreak
195
- the command \protect\usepackage{natbib} from the document}
196
- \endinput}{}
197
- \@ifclassloaded{egu}{\PackageError{natbib}
198
- {The egu class already includes natbib coding,\MessageBreak
199
- so you should not add it explicitly}
200
- {Type <Return> for now, but then later remove\MessageBreak
201
- the command \protect\usepackage{natbib} from the document}
202
- \endinput}{}
203
- % Define citation punctuation for some author-year styles
204
- % One may add and delete at this point
205
- % Or put additions into local configuration file natbib.cfg
206
- \newcommand\bibstyle@chicago{\bibpunct{(}{)}{;}{a}{,}{,}}
207
- \newcommand\bibstyle@named{\bibpunct{[}{]}{;}{a}{,}{,}}
208
- \newcommand\bibstyle@agu{\bibpunct{[}{]}{;}{a}{,}{,~}}%Amer. Geophys. Union
209
- \newcommand\bibstyle@copernicus{\bibpunct{(}{)}{;}{a}{,}{,}}%Copernicus Publications
210
- \let\bibstyle@egu=\bibstyle@copernicus
211
- \let\bibstyle@egs=\bibstyle@copernicus
212
- \newcommand\bibstyle@agsm{\bibpunct{(}{)}{,}{a}{}{,}\gdef\harvardand{\&}}
213
- \newcommand\bibstyle@kluwer{\bibpunct{(}{)}{,}{a}{}{,}\gdef\harvardand{\&}}
214
- \newcommand\bibstyle@dcu{\bibpunct{(}{)}{;}{a}{;}{,}\gdef\harvardand{and}}
215
- \newcommand\bibstyle@aa{\bibpunct{(}{)}{;}{a}{}{,}} %Astronomy & Astrophysics
216
- \newcommand\bibstyle@pass{\bibpunct{(}{)}{;}{a}{,}{,}}%Planet. & Space Sci
217
- \newcommand\bibstyle@anngeo{\bibpunct{(}{)}{;}{a}{,}{,}}%Annales Geophysicae
218
- \newcommand\bibstyle@nlinproc{\bibpunct{(}{)}{;}{a}{,}{,}}%Nonlin.Proc.Geophys.
219
- % Define citation punctuation for some numerical styles
220
- \newcommand\bibstyle@cospar{\bibpunct{/}{/}{,}{n}{}{}%
221
- \gdef\bibnumfmt##1{##1.}}
222
- \newcommand\bibstyle@esa{\bibpunct{(Ref.~}{)}{,}{n}{}{}%
223
- \gdef\bibnumfmt##1{##1.\hspace{1em}}}
224
- \newcommand\bibstyle@nature{\bibpunct{}{}{,}{s}{}{\textsuperscript{,}}%
225
- \gdef\bibnumfmt##1{##1.}}
226
- % The standard LaTeX styles
227
- \newcommand\bibstyle@plain{\bibpunct{[}{]}{,}{n}{}{,}}
228
- \let\bibstyle@alpha=\bibstyle@plain
229
- \let\bibstyle@abbrv=\bibstyle@plain
230
- \let\bibstyle@unsrt=\bibstyle@plain
231
- % The author-year modifications of the standard styles
232
- \newcommand\bibstyle@plainnat{\bibpunct{[}{]}{,}{a}{,}{,}}
233
- \let\bibstyle@abbrvnat=\bibstyle@plainnat
234
- \let\bibstyle@unsrtnat=\bibstyle@plainnat
235
- \newif\ifNAT@numbers \NAT@numbersfalse
236
- \newif\ifNAT@super \NAT@superfalse
237
- \let\NAT@merge\z@
238
- \DeclareOption{numbers}{\NAT@numberstrue
239
- \ExecuteOptions{square,comma,nobibstyle}}
240
- \DeclareOption{super}{\NAT@supertrue\NAT@numberstrue
241
- \renewcommand\NAT@open{}\renewcommand\NAT@close{}
242
- \ExecuteOptions{nobibstyle}}
243
- \DeclareOption{authoryear}{\NAT@numbersfalse
244
- \ExecuteOptions{round,semicolon,bibstyle}}
245
- \DeclareOption{round}{%
246
- \renewcommand\NAT@open{(} \renewcommand\NAT@close{)}
247
- \ExecuteOptions{nobibstyle}}
248
- \DeclareOption{square}{%
249
- \renewcommand\NAT@open{[} \renewcommand\NAT@close{]}
250
- \ExecuteOptions{nobibstyle}}
251
- \DeclareOption{angle}{%
252
- \renewcommand\NAT@open{$<$} \renewcommand\NAT@close{$>$}
253
- \ExecuteOptions{nobibstyle}}
254
- \DeclareOption{curly}{%
255
- \renewcommand\NAT@open{\{} \renewcommand\NAT@close{\}}
256
- \ExecuteOptions{nobibstyle}}
257
- \DeclareOption{comma}{\renewcommand\NAT@sep{,}
258
- \ExecuteOptions{nobibstyle}}
259
- \DeclareOption{semicolon}{\renewcommand\NAT@sep{;}
260
- \ExecuteOptions{nobibstyle}}
261
- \DeclareOption{colon}{\ExecuteOptions{semicolon}}
262
- \DeclareOption{nobibstyle}{\let\bibstyle=\@gobble}
263
- \DeclareOption{bibstyle}{\let\bibstyle=\@citestyle}
264
- \newif\ifNAT@openbib \NAT@openbibfalse
265
- \DeclareOption{openbib}{\NAT@openbibtrue}
266
- \DeclareOption{sectionbib}{\def\NAT@sectionbib{on}}
267
- \def\NAT@sort{\z@}
268
- \def\NAT@cmprs{\z@}
269
- \DeclareOption{sort}{\def\NAT@sort{\@ne}}
270
- \DeclareOption{compress}{\def\NAT@cmprs{\@ne}}
271
- \DeclareOption{sort&compress}{\def\NAT@sort{\@ne}\def\NAT@cmprs{\@ne}}
272
- \DeclareOption{mcite}{\let\NAT@merge\@ne}
273
- \DeclareOption{merge}{\@ifnum{\NAT@merge<\tw@}{\let\NAT@merge\tw@}{}}
274
- \DeclareOption{elide}{\@ifnum{\NAT@merge<\thr@@}{\let\NAT@merge\thr@@}{}}
275
- \@ifpackageloaded{cite}{\PackageWarningNoLine{natbib}
276
- {The `cite' package should not be used\MessageBreak
277
- with natbib. Use option `sort' instead}\ExecuteOptions{sort}}{}
278
- \@ifpackageloaded{mcite}{\PackageWarningNoLine{natbib}
279
- {The `mcite' package should not be used\MessageBreak
280
- with natbib. Use option `merge' instead}\ExecuteOptions{merge}}{}
281
- \@ifpackageloaded{citeref}{\PackageError{natbib}
282
- {The `citeref' package must be loaded after natbib}%
283
- {Move \protect\usepackage{citeref} to after \string\usepackage{natbib}}}{}
284
- \newif\ifNAT@longnames\NAT@longnamesfalse
285
- \DeclareOption{longnamesfirst}{\NAT@longnamestrue}
286
- \DeclareOption{nonamebreak}{\def\NAT@nmfmt#1{\mbox{\NAT@up#1}}}
287
- \def\NAT@nmfmt#1{{\NAT@up#1}}
288
- \renewcommand\bibstyle[1]{\csname bibstyle@#1\endcsname}
289
- \AtBeginDocument{\global\let\bibstyle=\@gobble}
290
- \let\@citestyle\bibstyle
291
- \newcommand\citestyle[1]{\@citestyle{#1}\let\bibstyle\@gobble}
292
- \newcommand\bibpunct[7][, ]%
293
- {\gdef\NAT@open{#2}\gdef\NAT@close{#3}\gdef
294
- \NAT@sep{#4}\global\NAT@numbersfalse
295
- \ifx #5n\global\NAT@numberstrue\global\NAT@superfalse
296
- \else
297
- \ifx #5s\global\NAT@numberstrue\global\NAT@supertrue
298
- \fi\fi
299
- \gdef\NAT@aysep{#6}\gdef\NAT@yrsep{#7}%
300
- \gdef\NAT@cmt{#1}%
301
- \NAT@@setcites
302
- }
303
- \newcommand\setcitestyle[1]{
304
- \@for\@tempa:=#1\do
305
- {\def\@tempb{round}\ifx\@tempa\@tempb
306
- \renewcommand\NAT@open{(}\renewcommand\NAT@close{)}\fi
307
- \def\@tempb{square}\ifx\@tempa\@tempb
308
- \renewcommand\NAT@open{[}\renewcommand\NAT@close{]}\fi
309
- \def\@tempb{angle}\ifx\@tempa\@tempb
310
- \renewcommand\NAT@open{$<$}\renewcommand\NAT@close{$>$}\fi
311
- \def\@tempb{curly}\ifx\@tempa\@tempb
312
- \renewcommand\NAT@open{\{}\renewcommand\NAT@close{\}}\fi
313
- \def\@tempb{semicolon}\ifx\@tempa\@tempb
314
- \renewcommand\NAT@sep{;}\fi
315
- \def\@tempb{colon}\ifx\@tempa\@tempb
316
- \renewcommand\NAT@sep{;}\fi
317
- \def\@tempb{comma}\ifx\@tempa\@tempb
318
- \renewcommand\NAT@sep{,}\fi
319
- \def\@tempb{authoryear}\ifx\@tempa\@tempb
320
- \NAT@numbersfalse\fi
321
- \def\@tempb{numbers}\ifx\@tempa\@tempb
322
- \NAT@numberstrue\NAT@superfalse\fi
323
- \def\@tempb{super}\ifx\@tempa\@tempb
324
- \NAT@numberstrue\NAT@supertrue\fi
325
- \expandafter\NAT@find@eq\@tempa=\relax\@nil
326
- \if\@tempc\relax\else
327
- \expandafter\NAT@rem@eq\@tempc
328
- \def\@tempb{open}\ifx\@tempa\@tempb
329
- \xdef\NAT@open{\@tempc}\fi
330
- \def\@tempb{close}\ifx\@tempa\@tempb
331
- \xdef\NAT@close{\@tempc}\fi
332
- \def\@tempb{aysep}\ifx\@tempa\@tempb
333
- \xdef\NAT@aysep{\@tempc}\fi
334
- \def\@tempb{yysep}\ifx\@tempa\@tempb
335
- \xdef\NAT@yrsep{\@tempc}\fi
336
- \def\@tempb{notesep}\ifx\@tempa\@tempb
337
- \xdef\NAT@cmt{\@tempc}\fi
338
- \def\@tempb{citesep}\ifx\@tempa\@tempb
339
- \xdef\NAT@sep{\@tempc}\fi
340
- \fi
341
- }%
342
- \NAT@@setcites
343
- }
344
- \def\NAT@find@eq#1=#2\@nil{\def\@tempa{#1}\def\@tempc{#2}}
345
- \def\NAT@rem@eq#1={\def\@tempc{#1}}
346
- \def\NAT@@setcites{\global\let\bibstyle\@gobble}
347
- \AtBeginDocument{\let\NAT@@setcites\NAT@set@cites}
348
- \newcommand\NAT@open{(} \newcommand\NAT@close{)}
349
- \newcommand\NAT@sep{;}
350
- \ProcessOptions
351
- \newcommand\NAT@aysep{,} \newcommand\NAT@yrsep{,}
352
- \newcommand\NAT@cmt{, }
353
- \newcommand\NAT@cite%
354
- [3]{\ifNAT@swa\NAT@@open\if*#2*\else#2\NAT@spacechar\fi
355
- #1\if*#3*\else\NAT@cmt#3\fi\NAT@@close\else#1\fi\endgroup}
356
- \newcommand\NAT@citenum%
357
- [3]{\ifNAT@swa\NAT@@open\if*#2*\else#2\NAT@spacechar\fi
358
- #1\if*#3*\else\NAT@cmt#3\fi\NAT@@close\else#1\fi\endgroup}
359
- \newcommand\NAT@citesuper[3]{\ifNAT@swa
360
- \if*#2*\else#2\NAT@spacechar\fi
361
- \unskip\kern\p@\textsuperscript{\NAT@@open#1\NAT@@close}%
362
- \if*#3*\else\NAT@spacechar#3\fi\else #1\fi\endgroup}
363
- \providecommand\textsuperscript[1]{\mbox{$^{\mbox{\scriptsize#1}}$}}
364
- \begingroup \catcode`\_=8
365
- \gdef\NAT@ifcat@num#1{%
366
- \ifcat_\ifnum\z@<0#1_\else A\fi
367
- \expandafter\@firstoftwo
368
- \else
369
- \expandafter\@secondoftwo
370
- \fi
371
- }%
372
- \endgroup
373
- \providecommand\@firstofone[1]{#1}
374
- \newcommand\NAT@citexnum{}
375
- \def\NAT@citexnum[#1][#2]#3{%
376
- \NAT@reset@parser
377
- \NAT@sort@cites{#3}%
378
- \NAT@reset@citea
379
- \@cite{\def\NAT@num{-1}\let\NAT@last@yr\relax\let\NAT@nm\@empty
380
- \@for\@citeb:=\NAT@cite@list\do
381
- {\@safe@activestrue
382
- \edef\@citeb{\expandafter\@firstofone\@citeb\@empty}%
383
- \@safe@activesfalse
384
- \@ifundefined{b@\@citeb\@extra@b@citeb}{%
385
- {\reset@font\bfseries?}
386
- \NAT@citeundefined\PackageWarning{natbib}%
387
- {Citation `\@citeb' on page \thepage \space undefined}}%
388
- {\let\NAT@last@num\NAT@num\let\NAT@last@nm\NAT@nm
389
- \NAT@parse{\@citeb}%
390
- \ifNAT@longnames\@ifundefined{bv@\@citeb\@extra@b@citeb}{%
391
- \let\NAT@name=\NAT@all@names
392
- \global\@namedef{bv@\@citeb\@extra@b@citeb}{}}{}%
393
- \fi
394
- \ifNAT@full\let\NAT@nm\NAT@all@names\else
395
- \let\NAT@nm\NAT@name\fi
396
- \ifNAT@swa
397
- \@ifnum{\NAT@ctype>\@ne}{%
398
- \@citea
399
- \NAT@hyper@{\@ifnum{\NAT@ctype=\tw@}{\NAT@test{\NAT@ctype}}{\NAT@alias}}%
400
- }{%
401
- \@ifnum{\NAT@cmprs>\z@}{%
402
- \NAT@ifcat@num\NAT@num
403
- {\let\NAT@nm=\NAT@num}%
404
- {\def\NAT@nm{-2}}%
405
- \NAT@ifcat@num\NAT@last@num
406
- {\@tempcnta=\NAT@last@num\relax}%
407
- {\@tempcnta\m@ne}%
408
- \@ifnum{\NAT@nm=\@tempcnta}{%
409
- \@ifnum{\NAT@merge>\@ne}{}{\NAT@last@yr@mbox}%
410
- }{%
411
- \advance\@tempcnta by\@ne
412
- \@ifnum{\NAT@nm=\@tempcnta}{%
413
- \ifx\NAT@last@yr\relax
414
- \def@NAT@last@yr{\@citea}%
415
- \else
416
- \def@NAT@last@yr{--\NAT@penalty}%
417
- \fi
418
- }{%
419
- \NAT@last@yr@mbox
420
- }%
421
- }%
422
- }{%
423
- \@tempswatrue
424
- \@ifnum{\NAT@merge>\@ne}{\@ifnum{\NAT@last@num=\NAT@num\relax}{\@tempswafalse}{}}{}%
425
- \if@tempswa\NAT@citea@mbox\fi
426
- }%
427
- }%
428
- \NAT@def@citea
429
- \else
430
- \ifcase\NAT@ctype
431
- \ifx\NAT@last@nm\NAT@nm \NAT@yrsep\NAT@penalty\NAT@space\else
432
- \@citea \NAT@test{\@ne}\NAT@spacechar\NAT@mbox{\NAT@super@kern\NAT@@open}%
433
- \fi
434
- \if*#1*\else#1\NAT@spacechar\fi
435
- \NAT@mbox{\NAT@hyper@{{\citenumfont{\NAT@num}}}}%
436
- \NAT@def@citea@box
437
- \or
438
- \NAT@hyper@citea@space{\NAT@test{\NAT@ctype}}%
439
- \or
440
- \NAT@hyper@citea@space{\NAT@test{\NAT@ctype}}%
441
- \or
442
- \NAT@hyper@citea@space\NAT@alias
443
- \fi
444
- \fi
445
- }%
446
- }%
447
- \@ifnum{\NAT@cmprs>\z@}{\NAT@last@yr}{}%
448
- \ifNAT@swa\else
449
- \@ifnum{\NAT@ctype=\z@}{%
450
- \if*#2*\else\NAT@cmt#2\fi
451
- }{}%
452
- \NAT@mbox{\NAT@@close}%
453
- \fi
454
- }{#1}{#2}%
455
- }%
456
- \def\NAT@citea@mbox{%
457
- \@citea\mbox{\NAT@hyper@{{\citenumfont{\NAT@num}}}}%
458
- }%
459
- \def\NAT@hyper@#1{%
460
- \hyper@natlinkstart{\@citeb\@extra@b@citeb}#1\hyper@natlinkend
461
- }%
462
- \def\NAT@hyper@citea#1{%
463
- \@citea
464
- \NAT@hyper@{#1}%
465
- \NAT@def@citea
466
- }%
467
- \def\NAT@hyper@citea@space#1{%
468
- \@citea
469
- \NAT@hyper@{#1}%
470
- \NAT@def@citea@space
471
- }%
472
- \def\def@NAT@last@yr#1{%
473
- \protected@edef\NAT@last@yr{%
474
- #1%
475
- \noexpand\mbox{%
476
- \noexpand\hyper@natlinkstart{\@citeb\@extra@b@citeb}%
477
- {\noexpand\citenumfont{\NAT@num}}%
478
- \noexpand\hyper@natlinkend
479
- }%
480
- }%
481
- }%
482
- \def\NAT@last@yr@mbox{%
483
- \NAT@last@yr\let\NAT@last@yr\relax
484
- \NAT@citea@mbox
485
- }%
486
- \newcommand\NAT@test[1]{%
487
- \@ifnum{#1=\@ne}{%
488
- \ifx\NAT@nm\NAT@noname
489
- \begingroup\reset@font\bfseries(author?)\endgroup
490
- \PackageWarning{natbib}{%
491
- Author undefined for citation`\@citeb' \MessageBreak on page \thepage%
492
- }%
493
- \else \NAT@nm
494
- \fi
495
- }{%
496
- \if\relax\NAT@date\relax
497
- \begingroup\reset@font\bfseries(year?)\endgroup
498
- \PackageWarning{natbib}{%
499
- Year undefined for citation`\@citeb' \MessageBreak on page \thepage%
500
- }%
501
- \else \NAT@date
502
- \fi
503
- }%
504
- }%
505
- \let\citenumfont=\@empty
506
- \newcommand\NAT@citex{}
507
- \def\NAT@citex%
508
- [#1][#2]#3{%
509
- \NAT@reset@parser
510
- \NAT@sort@cites{#3}%
511
- \NAT@reset@citea
512
- \@cite{\let\NAT@nm\@empty\let\NAT@year\@empty
513
- \@for\@citeb:=\NAT@cite@list\do
514
- {\@safe@activestrue
515
- \edef\@citeb{\expandafter\@firstofone\@citeb\@empty}%
516
- \@safe@activesfalse
517
- \@ifundefined{b@\@citeb\@extra@b@citeb}{\@citea%
518
- {\reset@font\bfseries ?}\NAT@citeundefined
519
- \PackageWarning{natbib}%
520
- {Citation `\@citeb' on page \thepage \space undefined}\def\NAT@date{}}%
521
- {\let\NAT@last@nm=\NAT@nm\let\NAT@last@yr=\NAT@year
522
- \NAT@parse{\@citeb}%
523
- \ifNAT@longnames\@ifundefined{bv@\@citeb\@extra@b@citeb}{%
524
- \let\NAT@name=\NAT@all@names
525
- \global\@namedef{bv@\@citeb\@extra@b@citeb}{}}{}%
526
- \fi
527
- \ifNAT@full\let\NAT@nm\NAT@all@names\else
528
- \let\NAT@nm\NAT@name\fi
529
- \ifNAT@swa\ifcase\NAT@ctype
530
- \if\relax\NAT@date\relax
531
- \@citea\NAT@hyper@{\NAT@nmfmt{\NAT@nm}\NAT@date}%
532
- \else
533
- \ifx\NAT@last@nm\NAT@nm\NAT@yrsep
534
- \ifx\NAT@last@yr\NAT@year
535
- \def\NAT@temp{{?}}%
536
- \ifx\NAT@temp\NAT@exlab\PackageWarningNoLine{natbib}%
537
- {Multiple citation on page \thepage: same authors and
538
- year\MessageBreak without distinguishing extra
539
- letter,\MessageBreak appears as question mark}\fi
540
- \NAT@hyper@{\NAT@exlab}%
541
- \else\unskip\NAT@spacechar
542
- \NAT@hyper@{\NAT@date}%
543
- \fi
544
- \else
545
- \@citea\NAT@hyper@{%
546
- \NAT@nmfmt{\NAT@nm}%
547
- \hyper@natlinkbreak{%
548
- \NAT@aysep\NAT@spacechar}{\@citeb\@extra@b@citeb
549
- }%
550
- \NAT@date
551
- }%
552
- \fi
553
- \fi
554
- \or\@citea\NAT@hyper@{\NAT@nmfmt{\NAT@nm}}%
555
- \or\@citea\NAT@hyper@{\NAT@date}%
556
- \or\@citea\NAT@hyper@{\NAT@alias}%
557
- \fi \NAT@def@citea
558
- \else
559
- \ifcase\NAT@ctype
560
- \if\relax\NAT@date\relax
561
- \@citea\NAT@hyper@{\NAT@nmfmt{\NAT@nm}}%
562
- \else
563
- \ifx\NAT@last@nm\NAT@nm\NAT@yrsep
564
- \ifx\NAT@last@yr\NAT@year
565
- \def\NAT@temp{{?}}%
566
- \ifx\NAT@temp\NAT@exlab\PackageWarningNoLine{natbib}%
567
- {Multiple citation on page \thepage: same authors and
568
- year\MessageBreak without distinguishing extra
569
- letter,\MessageBreak appears as question mark}\fi
570
- \NAT@hyper@{\NAT@exlab}%
571
- \else
572
- \unskip\NAT@spacechar
573
- \NAT@hyper@{\NAT@date}%
574
- \fi
575
- \else
576
- \@citea\NAT@hyper@{%
577
- \NAT@nmfmt{\NAT@nm}%
578
- \hyper@natlinkbreak{\NAT@spacechar\NAT@@open\if*#1*\else#1\NAT@spacechar\fi}%
579
- {\@citeb\@extra@b@citeb}%
580
- \NAT@date
581
- }%
582
- \fi
583
- \fi
584
- \or\@citea\NAT@hyper@{\NAT@nmfmt{\NAT@nm}}%
585
- \or\@citea\NAT@hyper@{\NAT@date}%
586
- \or\@citea\NAT@hyper@{\NAT@alias}%
587
- \fi
588
- \if\relax\NAT@date\relax
589
- \NAT@def@citea
590
- \else
591
- \NAT@def@citea@close
592
- \fi
593
- \fi
594
- }}\ifNAT@swa\else\if*#2*\else\NAT@cmt#2\fi
595
- \if\relax\NAT@date\relax\else\NAT@@close\fi\fi}{#1}{#2}}
596
- \def\NAT@spacechar{\ }%
597
- \def\NAT@separator{\NAT@sep\NAT@penalty}%
598
- \def\NAT@reset@citea{\c@NAT@ctr\@ne\let\@citea\@empty}%
599
- \def\NAT@def@citea{\def\@citea{\NAT@separator\NAT@space}}%
600
- \def\NAT@def@citea@space{\def\@citea{\NAT@separator\NAT@spacechar}}%
601
- \def\NAT@def@citea@close{\def\@citea{\NAT@@close\NAT@separator\NAT@space}}%
602
- \def\NAT@def@citea@box{\def\@citea{\NAT@mbox{\NAT@@close}\NAT@separator\NAT@spacechar}}%
603
- \newif\ifNAT@par \NAT@partrue
604
- \newcommand\NAT@@open{\ifNAT@par\NAT@open\fi}
605
- \newcommand\NAT@@close{\ifNAT@par\NAT@close\fi}
606
- \newcommand\NAT@alias{\@ifundefined{al@\@citeb\@extra@b@citeb}{%
607
- {\reset@font\bfseries(alias?)}\PackageWarning{natbib}
608
- {Alias undefined for citation `\@citeb'
609
- \MessageBreak on page \thepage}}{\@nameuse{al@\@citeb\@extra@b@citeb}}}
610
- \let\NAT@up\relax
611
- \newcommand\NAT@Up[1]{{\let\protect\@unexpandable@protect\let~\relax
612
- \expandafter\NAT@deftemp#1}\expandafter\NAT@UP\NAT@temp}
613
- \newcommand\NAT@deftemp[1]{\xdef\NAT@temp{#1}}
614
- \newcommand\NAT@UP[1]{\let\@tempa\NAT@UP\ifcat a#1\MakeUppercase{#1}%
615
- \let\@tempa\relax\else#1\fi\@tempa}
616
- \newcommand\shortcites[1]{%
617
- \@bsphack\@for\@citeb:=#1\do
618
- {\@safe@activestrue
619
- \edef\@citeb{\expandafter\@firstofone\@citeb\@empty}%
620
- \@safe@activesfalse
621
- \global\@namedef{bv@\@citeb\@extra@b@citeb}{}}\@esphack}
622
- \newcommand\NAT@biblabel[1]{\hfill}
623
- \newcommand\NAT@biblabelnum[1]{\bibnumfmt{#1}}
624
- \let\bibnumfmt\@empty
625
- \providecommand\@biblabel[1]{[#1]}
626
- \AtBeginDocument{\ifx\bibnumfmt\@empty\let\bibnumfmt\@biblabel\fi}
627
- \newcommand\NAT@bibsetnum[1]{\settowidth\labelwidth{\@biblabel{#1}}%
628
- \setlength{\leftmargin}{\labelwidth}\addtolength{\leftmargin}{\labelsep}%
629
- \setlength{\itemsep}{\bibsep}\setlength{\parsep}{\z@}%
630
- \ifNAT@openbib
631
- \addtolength{\leftmargin}{\bibindent}%
632
- \setlength{\itemindent}{-\bibindent}%
633
- \setlength{\listparindent}{\itemindent}%
634
- \setlength{\parsep}{0pt}%
635
- \fi
636
- }
637
- \newlength{\bibhang}
638
- \setlength{\bibhang}{1em}
639
- \newlength{\bibsep}
640
- {\@listi \global\bibsep\itemsep \global\advance\bibsep by\parsep}
641
-
642
- \newcommand\NAT@bibsetup%
643
- [1]{\setlength{\leftmargin}{\bibhang}\setlength{\itemindent}{-\leftmargin}%
644
- \setlength{\itemsep}{\bibsep}\setlength{\parsep}{\z@}}
645
- \newcommand\NAT@set@cites{%
646
- \ifNAT@numbers
647
- \ifNAT@super \let\@cite\NAT@citesuper
648
- \def\NAT@mbox##1{\unskip\nobreak\textsuperscript{##1}}%
649
- \let\citeyearpar=\citeyear
650
- \let\NAT@space\relax
651
- \def\NAT@super@kern{\kern\p@}%
652
- \else
653
- \let\NAT@mbox=\mbox
654
- \let\@cite\NAT@citenum
655
- \let\NAT@space\NAT@spacechar
656
- \let\NAT@super@kern\relax
657
- \fi
658
- \let\@citex\NAT@citexnum
659
- \let\@biblabel\NAT@biblabelnum
660
- \let\@bibsetup\NAT@bibsetnum
661
- \renewcommand\NAT@idxtxt{\NAT@name\NAT@spacechar\NAT@open\NAT@num\NAT@close}%
662
- \def\natexlab##1{}%
663
- \def\NAT@penalty{\penalty\@m}%
664
- \else
665
- \let\@cite\NAT@cite
666
- \let\@citex\NAT@citex
667
- \let\@biblabel\NAT@biblabel
668
- \let\@bibsetup\NAT@bibsetup
669
- \let\NAT@space\NAT@spacechar
670
- \let\NAT@penalty\@empty
671
- \renewcommand\NAT@idxtxt{\NAT@name\NAT@spacechar\NAT@open\NAT@date\NAT@close}%
672
- \def\natexlab##1{##1}%
673
- \fi}
674
- \AtBeginDocument{\NAT@set@cites}
675
- \AtBeginDocument{\ifx\SK@def\@undefined\else
676
- \ifx\SK@cite\@empty\else
677
- \SK@def\@citex[#1][#2]#3{\SK@\SK@@ref{#3}\SK@@citex[#1][#2]{#3}}\fi
678
- \ifx\SK@citeauthor\@undefined\def\HAR@checkdef{}\else
679
- \let\citeauthor\SK@citeauthor
680
- \let\citefullauthor\SK@citefullauthor
681
- \let\citeyear\SK@citeyear\fi
682
- \fi}
683
- \newif\ifNAT@full\NAT@fullfalse
684
- \newif\ifNAT@swa
685
- \DeclareRobustCommand\citet
686
- {\begingroup\NAT@swafalse\let\NAT@ctype\z@\NAT@partrue
687
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
688
- \newcommand\NAT@citetp{\@ifnextchar[{\NAT@@citetp}{\NAT@@citetp[]}}
689
- \newcommand\NAT@@citetp{}
690
- \def\NAT@@citetp[#1]{\@ifnextchar[{\@citex[#1]}{\@citex[][#1]}}
691
- \DeclareRobustCommand\citep
692
- {\begingroup\NAT@swatrue\let\NAT@ctype\z@\NAT@partrue
693
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
694
- \DeclareRobustCommand\cite
695
- {\begingroup\let\NAT@ctype\z@\NAT@partrue\NAT@swatrue
696
- \@ifstar{\NAT@fulltrue\NAT@cites}{\NAT@fullfalse\NAT@cites}}
697
- \newcommand\NAT@cites{\@ifnextchar [{\NAT@@citetp}{%
698
- \ifNAT@numbers\else
699
- \NAT@swafalse
700
- \fi
701
- \NAT@@citetp[]}}
702
- \DeclareRobustCommand\citealt
703
- {\begingroup\NAT@swafalse\let\NAT@ctype\z@\NAT@parfalse
704
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
705
- \DeclareRobustCommand\citealp
706
- {\begingroup\NAT@swatrue\let\NAT@ctype\z@\NAT@parfalse
707
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
708
- \DeclareRobustCommand\citenum
709
- {\begingroup
710
- \NAT@swatrue\let\NAT@ctype\z@\NAT@parfalse\let\textsuperscript\NAT@spacechar
711
- \NAT@citexnum[][]}
712
- \DeclareRobustCommand\citeauthor
713
- {\begingroup\NAT@swafalse\let\NAT@ctype\@ne\NAT@parfalse
714
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
715
- \DeclareRobustCommand\Citet
716
- {\begingroup\NAT@swafalse\let\NAT@ctype\z@\NAT@partrue
717
- \let\NAT@up\NAT@Up
718
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
719
- \DeclareRobustCommand\Citep
720
- {\begingroup\NAT@swatrue\let\NAT@ctype\z@\NAT@partrue
721
- \let\NAT@up\NAT@Up
722
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
723
- \DeclareRobustCommand\Citealt
724
- {\begingroup\NAT@swafalse\let\NAT@ctype\z@\NAT@parfalse
725
- \let\NAT@up\NAT@Up
726
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
727
- \DeclareRobustCommand\Citealp
728
- {\begingroup\NAT@swatrue\let\NAT@ctype\z@\NAT@parfalse
729
- \let\NAT@up\NAT@Up
730
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
731
- \DeclareRobustCommand\Citeauthor
732
- {\begingroup\NAT@swafalse\let\NAT@ctype\@ne\NAT@parfalse
733
- \let\NAT@up\NAT@Up
734
- \@ifstar{\NAT@fulltrue\NAT@citetp}{\NAT@fullfalse\NAT@citetp}}
735
- \DeclareRobustCommand\citeyear
736
- {\begingroup\NAT@swafalse\let\NAT@ctype\tw@\NAT@parfalse\NAT@citetp}
737
- \DeclareRobustCommand\citeyearpar
738
- {\begingroup\NAT@swatrue\let\NAT@ctype\tw@\NAT@partrue\NAT@citetp}
739
- \newcommand\citetext[1]{\NAT@open#1\NAT@close}
740
- \DeclareRobustCommand\citefullauthor
741
- {\citeauthor*}
742
- \newcommand\defcitealias[2]{%
743
- \@ifundefined{al@#1\@extra@b@citeb}{}
744
- {\PackageWarning{natbib}{Overwriting existing alias for citation #1}}
745
- \@namedef{al@#1\@extra@b@citeb}{#2}}
746
- \DeclareRobustCommand\citetalias{\begingroup
747
- \NAT@swafalse\let\NAT@ctype\thr@@\NAT@parfalse\NAT@citetp}
748
- \DeclareRobustCommand\citepalias{\begingroup
749
- \NAT@swatrue\let\NAT@ctype\thr@@\NAT@partrue\NAT@citetp}
750
- \renewcommand\nocite[1]{\@bsphack
751
- \@for\@citeb:=#1\do{%
752
- \@safe@activestrue
753
- \edef\@citeb{\expandafter\@firstofone\@citeb\@empty}%
754
- \@safe@activesfalse
755
- \if@filesw\immediate\write\@auxout{\string\citation{\@citeb}}\fi
756
- \if*\@citeb\else
757
- \@ifundefined{b@\@citeb\@extra@b@citeb}{%
758
- \NAT@citeundefined \PackageWarning{natbib}%
759
- {Citation `\@citeb' undefined}}{}\fi}%
760
- \@esphack}
761
- \newcommand\NAT@parse[1]{%
762
- \begingroup
763
- \let\protect=\@unexpandable@protect
764
- \let~\relax
765
- \let\active@prefix=\@gobble
766
- \edef\NAT@temp{\csname b@#1\@extra@b@citeb\endcsname}%
767
- \aftergroup\NAT@split
768
- \expandafter
769
- \endgroup
770
- \NAT@temp{}{}{}{}{}@@%
771
- \expandafter\NAT@parse@date\NAT@date??????@@%
772
- \ifciteindex\NAT@index\fi
773
- }%
774
- \def\NAT@split#1#2#3#4#5@@{%
775
- \gdef\NAT@num{#1}\gdef\NAT@name{#3}\gdef\NAT@date{#2}%
776
- \gdef\NAT@all@names{#4}%
777
- \ifx\NAT@num\@empty\gdef\NAT@num{0}\fi
778
- \ifx\NAT@noname\NAT@all@names \gdef\NAT@all@names{#3}\fi
779
- }%
780
- \def\NAT@reset@parser{%
781
- \global\let\NAT@num\@empty
782
- \global\let\NAT@name\@empty
783
- \global\let\NAT@date\@empty
784
- \global\let\NAT@all@names\@empty
785
- }%
786
- \newcommand\NAT@parse@date{}
787
- \def\NAT@parse@date#1#2#3#4#5#6@@{%
788
- \ifnum\the\catcode`#1=11\def\NAT@year{}\def\NAT@exlab{#1}\else
789
- \ifnum\the\catcode`#2=11\def\NAT@year{#1}\def\NAT@exlab{#2}\else
790
- \ifnum\the\catcode`#3=11\def\NAT@year{#1#2}\def\NAT@exlab{#3}\else
791
- \ifnum\the\catcode`#4=11\def\NAT@year{#1#2#3}\def\NAT@exlab{#4}\else
792
- \def\NAT@year{#1#2#3#4}\def\NAT@exlab{{#5}}\fi\fi\fi\fi}
793
- \newcommand\NAT@index{}
794
- \let\NAT@makeindex=\makeindex
795
- \renewcommand\makeindex{\NAT@makeindex
796
- \renewcommand\NAT@index{\@bsphack\begingroup
797
- \def~{\string~}\@wrindex{\NAT@idxtxt}}}
798
- \newcommand\NAT@idxtxt{\NAT@name\NAT@spacechar\NAT@open\NAT@date\NAT@close}
799
- \@ifxundefined\@indexfile{}{\let\NAT@makeindex\relax\makeindex}
800
- \newif\ifciteindex \citeindexfalse
801
- \newcommand\citeindextype{default}
802
- \newcommand\NAT@index@alt{{\let\protect=\noexpand\let~\relax
803
- \xdef\NAT@temp{\NAT@idxtxt}}\expandafter\NAT@exp\NAT@temp\@nil}
804
- \newcommand\NAT@exp{}
805
- \def\NAT@exp#1\@nil{\index[\citeindextype]{#1}}
806
-
807
- \AtBeginDocument{%
808
- \@ifpackageloaded{index}{\let\NAT@index=\NAT@index@alt}{}}
809
- \newcommand\NAT@ifcmd{\futurelet\NAT@temp\NAT@ifxcmd}
810
- \newcommand\NAT@ifxcmd{\ifx\NAT@temp\relax\else\expandafter\NAT@bare\fi}
811
- \def\NAT@bare#1(#2)#3(@)#4\@nil#5{%
812
- \if @#2
813
- \expandafter\NAT@apalk#1, , \@nil{#5}%
814
- \else
815
- \NAT@wrout{\the\c@NAT@ctr}{#2}{#1}{#3}{#5}%
816
- \fi
817
- }
818
- \newcommand\NAT@wrout[5]{%
819
- \if@filesw
820
- {\let\protect\noexpand\let~\relax
821
- \immediate
822
- \write\@auxout{\string\bibcite{#5}{{#1}{#2}{{#3}}{{#4}}}}}\fi
823
- \ignorespaces}
824
- \def\NAT@noname{{}}
825
- \renewcommand\bibitem{\@ifnextchar[{\@lbibitem}{\@lbibitem[]}}%
826
- \let\NAT@bibitem@first@sw\@secondoftwo
827
- \def\@lbibitem[#1]#2{%
828
- \if\relax\@extra@b@citeb\relax\else
829
- \@ifundefined{br@#2\@extra@b@citeb}{}{%
830
- \@namedef{br@#2}{\@nameuse{br@#2\@extra@b@citeb}}%
831
- }%
832
- \fi
833
- \@ifundefined{b@#2\@extra@b@citeb}{%
834
- \def\NAT@num{}%
835
- }{%
836
- \NAT@parse{#2}%
837
- }%
838
- \def\NAT@tmp{#1}%
839
- \expandafter\let\expandafter\bibitemOpen\csname NAT@b@open@#2\endcsname
840
- \expandafter\let\expandafter\bibitemShut\csname NAT@b@shut@#2\endcsname
841
- \@ifnum{\NAT@merge>\@ne}{%
842
- \NAT@bibitem@first@sw{%
843
- \@firstoftwo
844
- }{%
845
- \@ifundefined{NAT@b*@#2}{%
846
- \@firstoftwo
847
- }{%
848
- \expandafter\def\expandafter\NAT@num\expandafter{\the\c@NAT@ctr}%
849
- \@secondoftwo
850
- }%
851
- }%
852
- }{%
853
- \@firstoftwo
854
- }%
855
- {%
856
- \global\advance\c@NAT@ctr\@ne
857
- \@ifx{\NAT@tmp\@empty}{\@firstoftwo}{%
858
- \@secondoftwo
859
- }%
860
- {%
861
- \expandafter\def\expandafter\NAT@num\expandafter{\the\c@NAT@ctr}%
862
- \global\NAT@stdbsttrue
863
- }{}%
864
- \bibitem@fin
865
- \item[\hfil\NAT@anchor{#2}{\NAT@num}]%
866
- \global\let\NAT@bibitem@first@sw\@secondoftwo
867
- \NAT@bibitem@init
868
- }%
869
- {%
870
- \NAT@anchor{#2}{}%
871
- \NAT@bibitem@cont
872
- \bibitem@fin
873
- }%
874
- \@ifx{\NAT@tmp\@empty}{%
875
- \NAT@wrout{\the\c@NAT@ctr}{}{}{}{#2}%
876
- }{%
877
- \expandafter\NAT@ifcmd\NAT@tmp(@)(@)\@nil{#2}%
878
- }%
879
- }%
880
- \def\bibitem@fin{%
881
- \@ifxundefined\@bibstop{}{\csname bibitem@\@bibstop\endcsname}%
882
- }%
883
- \def\NAT@bibitem@init{%
884
- \let\@bibstop\@undefined
885
- }%
886
- \def\NAT@bibitem@cont{%
887
- \let\bibitem@Stop\bibitemStop
888
- \let\bibitem@NoStop\bibitemContinue
889
- }%
890
- \def\BibitemOpen{%
891
- \bibitemOpen
892
- }%
893
- \def\BibitemShut#1{%
894
- \bibitemShut
895
- \def\@bibstop{#1}%
896
- \let\bibitem@Stop\bibitemStop
897
- \let\bibitem@NoStop\bibitemNoStop
898
- }%
899
- \def\bibitemStop{}%
900
- \def\bibitemNoStop{.\spacefactor\@mmm\space}%
901
- \def\bibitemContinue{\spacefactor\@mmm\space}%
902
- \mathchardef\@mmm=3000 %
903
- \providecommand{\bibAnnote}[3]{%
904
- \BibitemShut{#1}%
905
- \def\@tempa{#3}\@ifx{\@tempa\@empty}{}{%
906
- \begin{quotation}\noindent
907
- \textsc{Key:}\ #2\\\textsc{Annotation:}\ \@tempa
908
- \end{quotation}%
909
- }%
910
- }%
911
- \providecommand{\bibAnnoteFile}[2]{%
912
- \IfFileExists{#2}{%
913
- \bibAnnote{#1}{#2}{\input{#2}}%
914
- }{%
915
- \bibAnnote{#1}{#2}{}%
916
- }%
917
- }%
918
- \let\bibitemOpen\relax
919
- \let\bibitemShut\relax
920
- \def\bibfield{\@ifnum{\NAT@merge>\tw@}{\@bibfield}{\@secondoftwo}}%
921
- \def\@bibfield#1#2{%
922
- \begingroup
923
- \let\Doi\@gobble
924
- \let\bibinfo\relax
925
- \let\restore@protect\@empty
926
- \protected@edef\@tempa{#2}%
927
- \aftergroup\def\aftergroup\@tempa
928
- \expandafter\endgroup\expandafter{\@tempa}%
929
- \expandafter\@ifx\expandafter{\csname @bib#1\endcsname\@tempa}{%
930
- \expandafter\let\expandafter\@tempa\csname @bib@X#1\endcsname
931
- }{%
932
- \expandafter\let\csname @bib#1\endcsname\@tempa
933
- \expandafter\let\expandafter\@tempa\csname @bib@Y#1\endcsname
934
- }%
935
- \@ifx{\@tempa\relax}{\let\@tempa\@firstofone}{}%
936
- \@tempa{#2}%
937
- }%
938
- \def\bibinfo#1{%
939
- \expandafter\let\expandafter\@tempa\csname bibinfo@X@#1\endcsname
940
- \@ifx{\@tempa\relax}{\@firstofone}{\@tempa}%
941
- }%
942
- \def\@bib@Xauthor#1{\let\@bib@Xjournal\@gobble}%
943
- \def\@bib@Xjournal#1{\begingroup\let\bibinfo@X@journal\@bib@Z@journal#1\endgroup}%
944
- \def\@bibibid@#1{\textit{ibid}.}%
945
- \appdef\NAT@bibitem@init{%
946
- \let\@bibauthor \@empty
947
- \let\@bibjournal \@empty
948
- \let\@bib@Z@journal\@bibibid@
949
- }%
950
- \ifx\SK@lbibitem\@undefined\else
951
- \let\SK@lbibitem\@lbibitem
952
- \def\@lbibitem[#1]#2{%
953
- \SK@lbibitem[#1]{#2}\SK@\SK@@label{#2}\ignorespaces}\fi
954
- \newif\ifNAT@stdbst \NAT@stdbstfalse
955
-
956
- \AtEndDocument{%
957
- \ifNAT@stdbst\if@filesw
958
- \immediate\write\@auxout{%
959
- \string\providecommand\string\NAT@force@numbers{}%
960
- \string\NAT@force@numbers
961
- }%
962
- \fi\fi
963
- }
964
- \newcommand\NAT@force@numbers{%
965
- \ifNAT@numbers\else
966
- \PackageError{natbib}{Bibliography not compatible with author-year
967
- citations.\MessageBreak
968
- Press <return> to continue in numerical citation style}
969
- {Check the bibliography entries for non-compliant syntax,\MessageBreak
970
- or select author-year BibTeX style, e.g. plainnat}%
971
- \global\NAT@numberstrue\fi}
972
-
973
- \providecommand\bibcite{}
974
- \renewcommand\bibcite[2]{%
975
- \@ifundefined{b@#1\@extra@binfo}{\relax}{%
976
- \NAT@citemultiple
977
- \PackageWarningNoLine{natbib}{Citation `#1' multiply defined}%
978
- }%
979
- \global\@namedef{b@#1\@extra@binfo}{#2}%
980
- }%
981
- \AtEndDocument{\NAT@swatrue\let\bibcite\NAT@testdef}
982
- \newcommand\NAT@testdef[2]{%
983
- \def\NAT@temp{#2}%
984
- \expandafter \ifx \csname b@#1\@extra@binfo\endcsname\NAT@temp
985
- \else
986
- \ifNAT@swa \NAT@swafalse
987
- \PackageWarningNoLine{natbib}{%
988
- Citation(s) may have changed.\MessageBreak
989
- Rerun to get citations correct%
990
- }%
991
- \fi
992
- \fi
993
- }%
994
- \newcommand\NAT@apalk{}
995
- \def\NAT@apalk#1, #2, #3\@nil#4{%
996
- \if\relax#2\relax
997
- \global\NAT@stdbsttrue
998
- \NAT@wrout{#1}{}{}{}{#4}%
999
- \else
1000
- \NAT@wrout{\the\c@NAT@ctr}{#2}{#1}{}{#4}%
1001
- \fi
1002
- }%
1003
- \newcommand\citeauthoryear{}
1004
- \def\citeauthoryear#1#2#3(@)(@)\@nil#4{%
1005
- \if\relax#3\relax
1006
- \NAT@wrout{\the\c@NAT@ctr}{#2}{#1}{}{#4}%
1007
- \else
1008
- \NAT@wrout{\the\c@NAT@ctr}{#3}{#2}{#1}{#4}%
1009
- \fi
1010
- }%
1011
- \newcommand\citestarts{\NAT@open}%
1012
- \newcommand\citeends{\NAT@close}%
1013
- \newcommand\betweenauthors{and}%
1014
- \newcommand\astroncite{}
1015
- \def\astroncite#1#2(@)(@)\@nil#3{%
1016
- \NAT@wrout{\the\c@NAT@ctr}{#2}{#1}{}{#3}%
1017
- }%
1018
- \newcommand\citename{}
1019
- \def\citename#1#2(@)(@)\@nil#3{\expandafter\NAT@apalk#1#2, \@nil{#3}}
1020
- \newcommand\harvarditem[4][]{%
1021
- \if\relax#1\relax
1022
- \bibitem[#2(#3)]{#4}%
1023
- \else
1024
- \bibitem[#1(#3)#2]{#4}%
1025
- \fi
1026
- }%
1027
- \newcommand\harvardleft{\NAT@open}
1028
- \newcommand\harvardright{\NAT@close}
1029
- \newcommand\harvardyearleft{\NAT@open}
1030
- \newcommand\harvardyearright{\NAT@close}
1031
- \AtBeginDocument{\providecommand{\harvardand}{and}}
1032
- \newcommand\harvardurl[1]{\textbf{URL:} \textit{#1}}
1033
- \providecommand\bibsection{}
1034
- \@ifundefined{chapter}{%
1035
- \renewcommand\bibsection{%
1036
- \section*{\refname\@mkboth{\MakeUppercase{\refname}}{\MakeUppercase{\refname}}}%
1037
- }%
1038
- }{%
1039
- \@ifxundefined\NAT@sectionbib{%
1040
- \renewcommand\bibsection{%
1041
- \chapter*{\bibname\@mkboth{\MakeUppercase{\bibname}}{\MakeUppercase{\bibname}}}%
1042
- }%
1043
- }{%
1044
- \renewcommand\bibsection{%
1045
- \section*{\bibname\ifx\@mkboth\@gobbletwo\else\markright{\MakeUppercase{\bibname}}\fi}%
1046
- }%
1047
- }%
1048
- }%
1049
- \@ifclassloaded{amsart}{\renewcommand\bibsection{\section*{\refname}}}{}%
1050
- \@ifclassloaded{amsbook}{\renewcommand\bibsection{\chapter*{\bibname}}}{}%
1051
- \@ifxundefined\bib@heading{}{\let\bibsection\bib@heading}%
1052
- \newcounter{NAT@ctr}
1053
- \renewenvironment{thebibliography}[1]{%
1054
- \bibsection
1055
- \parindent\z@
1056
- \bibpreamble
1057
- \bibfont
1058
- \list{\@biblabel{\the\c@NAT@ctr}}{\@bibsetup{#1}\global\c@NAT@ctr\z@}%
1059
- \ifNAT@openbib
1060
- \renewcommand\newblock{\par}%
1061
- \else
1062
- \renewcommand\newblock{\hskip .11em \@plus.33em \@minus.07em}%
1063
- \fi
1064
- \sloppy\clubpenalty4000\widowpenalty4000
1065
- \sfcode`\.\@m
1066
- \let\NAT@bibitem@first@sw\@firstoftwo
1067
- \let\citeN\cite \let\shortcite\cite
1068
- \let\citeasnoun\cite
1069
- }{%
1070
- \bibitem@fin
1071
- \bibpostamble
1072
- \def\@noitemerr{%
1073
- \PackageWarning{natbib}{Empty `thebibliography' environment}%
1074
- }%
1075
- \endlist
1076
- \bibcleanup
1077
- }%
1078
- \let\bibfont\@empty
1079
- \let\bibpreamble\@empty
1080
- \let\bibpostamble\@empty
1081
- \def\bibcleanup{\vskip-\lastskip}%
1082
- \providecommand\reset@font{\relax}
1083
- \providecommand\bibname{Bibliography}
1084
- \providecommand\refname{References}
1085
- \newcommand\NAT@citeundefined{\gdef \NAT@undefined {%
1086
- \PackageWarningNoLine{natbib}{There were undefined citations}}}
1087
- \let \NAT@undefined \relax
1088
- \newcommand\NAT@citemultiple{\gdef \NAT@multiple {%
1089
- \PackageWarningNoLine{natbib}{There were multiply defined citations}}}
1090
- \let \NAT@multiple \relax
1091
- \AtEndDocument{\NAT@undefined\NAT@multiple}
1092
- \providecommand\@mkboth[2]{}
1093
- \providecommand\MakeUppercase{\uppercase}
1094
- \providecommand{\@extra@b@citeb}{}
1095
- \gdef\@extra@binfo{}
1096
- \def\NAT@anchor#1#2{%
1097
- \hyper@natanchorstart{#1\@extra@b@citeb}%
1098
- \def\@tempa{#2}\@ifx{\@tempa\@empty}{}{\@biblabel{#2}}%
1099
- \hyper@natanchorend
1100
- }%
1101
- \providecommand\hyper@natanchorstart[1]{}%
1102
- \providecommand\hyper@natanchorend{}%
1103
- \providecommand\hyper@natlinkstart[1]{}%
1104
- \providecommand\hyper@natlinkend{}%
1105
- \providecommand\hyper@natlinkbreak[2]{#1}%
1106
- \AtBeginDocument{%
1107
- \@ifpackageloaded{babel}{%
1108
- \let\org@@citex\@citex}{}}
1109
- \providecommand\@safe@activestrue{}%
1110
- \providecommand\@safe@activesfalse{}%
1111
-
1112
- \newcommand\NAT@sort@cites[1]{%
1113
- \let\NAT@cite@list\@empty
1114
- \@for\@citeb:=#1\do{\expandafter\NAT@star@cite\@citeb\@@}%
1115
- \if@filesw
1116
- \expandafter\immediate\expandafter\write\expandafter\@auxout
1117
- \expandafter{\expandafter\string\expandafter\citation\expandafter{\NAT@cite@list}}%
1118
- \fi
1119
- \@ifnum{\NAT@sort>\z@}{%
1120
- \expandafter\NAT@sort@cites@\expandafter{\NAT@cite@list}%
1121
- }{}%
1122
- }%
1123
- \def\NAT@star@cite{%
1124
- \let\NAT@star@sw\@secondoftwo
1125
- \@ifnum{\NAT@merge>\z@}{%
1126
- \@ifnextchar*{%
1127
- \let\NAT@star@sw\@firstoftwo
1128
- \NAT@star@cite@star
1129
- }{%
1130
- \NAT@star@cite@nostar
1131
- }%
1132
- }{%
1133
- \NAT@star@cite@noextension
1134
- }%
1135
- }%
1136
- \def\NAT@star@cite@star*{%
1137
- \NAT@star@cite@nostar
1138
- }%
1139
- \def\NAT@star@cite@nostar{%
1140
- \let\nat@keyopt@open\@empty
1141
- \let\nat@keyopt@shut\@empty
1142
- \@ifnextchar[{\NAT@star@cite@pre}{\NAT@star@cite@pre[]}%
1143
- }%
1144
- \def\NAT@star@cite@pre[#1]{%
1145
- \def\nat@keyopt@open{#1}%
1146
- \@ifnextchar[{\NAT@star@cite@post}{\NAT@star@cite@post[]}%
1147
- }%
1148
- \def\NAT@star@cite@post[#1]#2\@@{%
1149
- \def\nat@keyopt@shut{#1}%
1150
- \NAT@star@sw{\expandafter\global\expandafter\let\csname NAT@b*@#2\endcsname\@empty}{}%
1151
- \NAT@cite@list@append{#2}%
1152
- }%
1153
- \def\NAT@star@cite@noextension#1\@@{%
1154
- \let\nat@keyopt@open\@empty
1155
- \let\nat@keyopt@shut\@empty
1156
- \NAT@cite@list@append{#1}%
1157
- }%
1158
- \def\NAT@cite@list@append#1{%
1159
- \edef\@citeb{\@firstofone#1\@empty}%
1160
- \if@filesw\@ifxundefined\@cprwrite{}{\expandafter\@cprwrite\@citeb=}\fi
1161
- \if\relax\nat@keyopt@open\relax\else
1162
- \global\expandafter\let\csname NAT@b@open@\@citeb\endcsname\nat@keyopt@open
1163
- \fi
1164
- \if\relax\nat@keyopt@shut\relax\else
1165
- \global\expandafter\let\csname NAT@b@shut@\@citeb\endcsname\nat@keyopt@shut
1166
- \fi
1167
- \toks@\expandafter{\NAT@cite@list}%
1168
- \ifx\NAT@cite@list\@empty
1169
- \@temptokena\expandafter{\@citeb}%
1170
- \else
1171
- \@temptokena\expandafter{\expandafter,\@citeb}%
1172
- \fi
1173
- \edef\NAT@cite@list{\the\toks@\the\@temptokena}%
1174
- }%
1175
- \newcommand\NAT@sort@cites@[1]{%
1176
- \count@\z@
1177
- \@tempcntb\m@ne
1178
- \let\@celt\delimiter
1179
- \def\NAT@num@list{}%
1180
- \let\NAT@cite@list\@empty
1181
- \let\NAT@nonsort@list\@empty
1182
- \@for \@citeb:=#1\do{\NAT@make@cite@list}%
1183
- \ifx\NAT@nonsort@list\@empty\else
1184
- \protected@edef\NAT@cite@list{\NAT@cite@list\NAT@nonsort@list}%
1185
- \fi
1186
- \ifx\NAT@cite@list\@empty\else
1187
- \protected@edef\NAT@cite@list{\expandafter\NAT@xcom\NAT@cite@list @@}%
1188
- \fi
1189
- }%
1190
- \def\NAT@make@cite@list{%
1191
- \advance\count@\@ne
1192
- \@safe@activestrue
1193
- \edef\@citeb{\expandafter\@firstofone\@citeb\@empty}%
1194
- \@safe@activesfalse
1195
- \@ifundefined{b@\@citeb\@extra@b@citeb}%
1196
- {\def\NAT@num{A}}%
1197
- {\NAT@parse{\@citeb}}%
1198
- \NAT@ifcat@num\NAT@num
1199
- {\@tempcnta\NAT@num \relax
1200
- \@ifnum{\@tempcnta<\@tempcntb}{%
1201
- \let\NAT@@cite@list=\NAT@cite@list
1202
- \let\NAT@cite@list\@empty
1203
- \begingroup\let\@celt=\NAT@celt\NAT@num@list\endgroup
1204
- \protected@edef\NAT@num@list{%
1205
- \expandafter\NAT@num@celt \NAT@num@list \@gobble @%
1206
- }%
1207
- }{%
1208
- \protected@edef\NAT@num@list{\NAT@num@list \@celt{\NAT@num}}%
1209
- \protected@edef\NAT@cite@list{\NAT@cite@list\@citeb,}%
1210
- \@tempcntb\@tempcnta
1211
- }%
1212
- }%
1213
- {\protected@edef\NAT@nonsort@list{\NAT@nonsort@list\@citeb,}}%
1214
- }%
1215
- \def\NAT@celt#1{%
1216
- \@ifnum{#1>\@tempcnta}{%
1217
- \xdef\NAT@cite@list{\NAT@cite@list\@citeb,\NAT@@cite@list}%
1218
- \let\@celt\@gobble
1219
- }{%
1220
- \expandafter\def@NAT@cite@lists\NAT@@cite@list\@@
1221
- }%
1222
- }%
1223
- \def\NAT@num@celt#1#2{%
1224
- \ifx#1\@celt
1225
- \@ifnum{#2>\@tempcnta}{%
1226
- \@celt{\number\@tempcnta}%
1227
- \@celt{#2}%
1228
- }{%
1229
- \@celt{#2}%
1230
- \expandafter\NAT@num@celt
1231
- }%
1232
- \fi
1233
- }%
1234
- \def\def@NAT@cite@lists#1,#2\@@{%
1235
- \xdef\NAT@cite@list{\NAT@cite@list#1,}%
1236
- \xdef\NAT@@cite@list{#2}%
1237
- }%
1238
- \def\NAT@nextc#1,#2@@{#1,}
1239
- \def\NAT@restc#1,#2{#2}
1240
- \def\NAT@xcom#1,@@{#1}
1241
- \InputIfFileExists{natbib.cfg}
1242
- {\typeout{Local config file natbib.cfg used}}{}
1243
- %%
1244
- %% <<<<< End of generated file <<<<<<
1245
- %%
1246
- %% End of file `natbib.sty'.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/ref.bib DELETED
@@ -1,1908 +0,0 @@
1
- @article{1512.07669,
2
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
3
- Decision Processes},
4
- author = {Vikram Krishnamurthy},
5
- journal={arXiv preprint arXiv:1512.07669},
6
- year = {2015},
7
- url = {http://arxiv.org/abs/1512.07669v1}
8
- }
9
-
10
- @article{1512.07669,
11
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
12
- Decision Processes},
13
- author = {Vikram Krishnamurthy},
14
- journal={arXiv preprint arXiv:1512.07669},
15
- year = {2015},
16
- url = {http://arxiv.org/abs/1512.07669v1}
17
- }
18
-
19
- @article{1511.02377,
20
- title = {The Value Functions of Markov Decision Processes},
21
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
22
- journal={arXiv preprint arXiv:1511.02377},
23
- year = {2015},
24
- url = {http://arxiv.org/abs/1511.02377v1}
25
- }
26
-
27
- @article{1512.07669,
28
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
29
- Decision Processes},
30
- author = {Vikram Krishnamurthy},
31
- journal={arXiv preprint arXiv:1512.07669},
32
- year = {2015},
33
- url = {http://arxiv.org/abs/1512.07669v1}
34
- }
35
-
36
- @article{1511.02377,
37
- title = {The Value Functions of Markov Decision Processes},
38
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
39
- journal={arXiv preprint arXiv:1511.02377},
40
- year = {2015},
41
- url = {http://arxiv.org/abs/1511.02377v1}
42
- }
43
-
44
- @article{1512.09075,
45
- title = {A Notation for Markov Decision Processes},
46
- author = {Philip S. Thomas , Billy Okal},
47
- journal={arXiv preprint arXiv:1512.09075},
48
- year = {2015},
49
- url = {http://arxiv.org/abs/1512.09075v2}
50
- }
51
-
52
- @article{1512.07669,
53
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
54
- Decision Processes},
55
- author = {Vikram Krishnamurthy},
56
- journal={arXiv preprint arXiv:1512.07669},
57
- year = {2015},
58
- url = {http://arxiv.org/abs/1512.07669v1}
59
- }
60
-
61
- @article{1511.02377,
62
- title = {The Value Functions of Markov Decision Processes},
63
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
64
- journal={arXiv preprint arXiv:1511.02377},
65
- year = {2015},
66
- url = {http://arxiv.org/abs/1511.02377v1}
67
- }
68
-
69
- @article{1512.09075,
70
- title = {A Notation for Markov Decision Processes},
71
- author = {Philip S. Thomas , Billy Okal},
72
- journal={arXiv preprint arXiv:1512.09075},
73
- year = {2015},
74
- url = {http://arxiv.org/abs/1512.09075v2}
75
- }
76
-
77
- @article{2008.10426,
78
- title = {Taming denumerable Markov decision processes with decisiveness},
79
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
80
- journal={arXiv preprint arXiv:2008.10426},
81
- year = {2020},
82
- url = {http://arxiv.org/abs/2008.10426v1}
83
- }
84
-
85
- @article{1512.07669,
86
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
87
- Decision Processes},
88
- author = {Vikram Krishnamurthy},
89
- journal={arXiv preprint arXiv:1512.07669},
90
- year = {2015},
91
- url = {http://arxiv.org/abs/1512.07669v1}
92
- }
93
-
94
- @article{1511.02377,
95
- title = {The Value Functions of Markov Decision Processes},
96
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
97
- journal={arXiv preprint arXiv:1511.02377},
98
- year = {2015},
99
- url = {http://arxiv.org/abs/1511.02377v1}
100
- }
101
-
102
- @article{1512.09075,
103
- title = {A Notation for Markov Decision Processes},
104
- author = {Philip S. Thomas , Billy Okal},
105
- journal={arXiv preprint arXiv:1512.09075},
106
- year = {2015},
107
- url = {http://arxiv.org/abs/1512.09075v2}
108
- }
109
-
110
- @article{2008.10426,
111
- title = {Taming denumerable Markov decision processes with decisiveness},
112
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
113
- journal={arXiv preprint arXiv:2008.10426},
114
- year = {2020},
115
- url = {http://arxiv.org/abs/2008.10426v1}
116
- }
117
-
118
- @article{0711.2185,
119
- title = {Exact finite approximations of average-cost countable Markov Decision
120
- Processes},
121
- author = {Arie Leizarowitz , Adam Shwartz},
122
- journal={arXiv preprint arXiv:0711.2185},
123
- year = {2007},
124
- url = {http://arxiv.org/abs/0711.2185v1}
125
- }
126
-
127
- @article{1512.07669,
128
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
129
- Decision Processes},
130
- author = {Vikram Krishnamurthy},
131
- journal={arXiv preprint arXiv:1512.07669},
132
- year = {2015},
133
- url = {http://arxiv.org/abs/1512.07669v1}
134
- }
135
-
136
- @article{1511.02377,
137
- title = {The Value Functions of Markov Decision Processes},
138
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
139
- journal={arXiv preprint arXiv:1511.02377},
140
- year = {2015},
141
- url = {http://arxiv.org/abs/1511.02377v1}
142
- }
143
-
144
- @article{1512.09075,
145
- title = {A Notation for Markov Decision Processes},
146
- author = {Philip S. Thomas , Billy Okal},
147
- journal={arXiv preprint arXiv:1512.09075},
148
- year = {2015},
149
- url = {http://arxiv.org/abs/1512.09075v2}
150
- }
151
-
152
- @article{2008.10426,
153
- title = {Taming denumerable Markov decision processes with decisiveness},
154
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
155
- journal={arXiv preprint arXiv:2008.10426},
156
- year = {2020},
157
- url = {http://arxiv.org/abs/2008.10426v1}
158
- }
159
-
160
- @article{0711.2185,
161
- title = {Exact finite approximations of average-cost countable Markov Decision
162
- Processes},
163
- author = {Arie Leizarowitz , Adam Shwartz},
164
- journal={arXiv preprint arXiv:0711.2185},
165
- year = {2007},
166
- url = {http://arxiv.org/abs/0711.2185v1}
167
- }
168
-
169
- @article{2303.08631,
170
- title = {Smoothed Q-learning},
171
- author = {David Barber},
172
- journal={arXiv preprint arXiv:2303.08631},
173
- year = {2023},
174
- url = {http://arxiv.org/abs/2303.08631v1}
175
- }
176
-
177
- @article{1512.07669,
178
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
179
- Decision Processes},
180
- author = {Vikram Krishnamurthy},
181
- journal={arXiv preprint arXiv:1512.07669},
182
- year = {2015},
183
- url = {http://arxiv.org/abs/1512.07669v1}
184
- }
185
-
186
- @article{1511.02377,
187
- title = {The Value Functions of Markov Decision Processes},
188
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
189
- journal={arXiv preprint arXiv:1511.02377},
190
- year = {2015},
191
- url = {http://arxiv.org/abs/1511.02377v1}
192
- }
193
-
194
- @article{1512.09075,
195
- title = {A Notation for Markov Decision Processes},
196
- author = {Philip S. Thomas , Billy Okal},
197
- journal={arXiv preprint arXiv:1512.09075},
198
- year = {2015},
199
- url = {http://arxiv.org/abs/1512.09075v2}
200
- }
201
-
202
- @article{2008.10426,
203
- title = {Taming denumerable Markov decision processes with decisiveness},
204
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
205
- journal={arXiv preprint arXiv:2008.10426},
206
- year = {2020},
207
- url = {http://arxiv.org/abs/2008.10426v1}
208
- }
209
-
210
- @article{0711.2185,
211
- title = {Exact finite approximations of average-cost countable Markov Decision
212
- Processes},
213
- author = {Arie Leizarowitz , Adam Shwartz},
214
- journal={arXiv preprint arXiv:0711.2185},
215
- year = {2007},
216
- url = {http://arxiv.org/abs/0711.2185v1}
217
- }
218
-
219
- @article{2303.08631,
220
- title = {Smoothed Q-learning},
221
- author = {David Barber},
222
- journal={arXiv preprint arXiv:2303.08631},
223
- year = {2023},
224
- url = {http://arxiv.org/abs/2303.08631v1}
225
- }
226
-
227
- @article{2106.14642,
228
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
229
- from Offline Expert Examples},
230
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
231
- journal={arXiv preprint arXiv:2106.14642},
232
- year = {2021},
233
- url = {http://arxiv.org/abs/2106.14642v3}
234
- }
235
-
236
- @article{1512.07669,
237
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
238
- Decision Processes},
239
- author = {Vikram Krishnamurthy},
240
- journal={arXiv preprint arXiv:1512.07669},
241
- year = {2015},
242
- url = {http://arxiv.org/abs/1512.07669v1}
243
- }
244
-
245
- @article{1511.02377,
246
- title = {The Value Functions of Markov Decision Processes},
247
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
248
- journal={arXiv preprint arXiv:1511.02377},
249
- year = {2015},
250
- url = {http://arxiv.org/abs/1511.02377v1}
251
- }
252
-
253
- @article{1512.09075,
254
- title = {A Notation for Markov Decision Processes},
255
- author = {Philip S. Thomas , Billy Okal},
256
- journal={arXiv preprint arXiv:1512.09075},
257
- year = {2015},
258
- url = {http://arxiv.org/abs/1512.09075v2}
259
- }
260
-
261
- @article{2008.10426,
262
- title = {Taming denumerable Markov decision processes with decisiveness},
263
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
264
- journal={arXiv preprint arXiv:2008.10426},
265
- year = {2020},
266
- url = {http://arxiv.org/abs/2008.10426v1}
267
- }
268
-
269
- @article{0711.2185,
270
- title = {Exact finite approximations of average-cost countable Markov Decision
271
- Processes},
272
- author = {Arie Leizarowitz , Adam Shwartz},
273
- journal={arXiv preprint arXiv:0711.2185},
274
- year = {2007},
275
- url = {http://arxiv.org/abs/0711.2185v1}
276
- }
277
-
278
- @article{2303.08631,
279
- title = {Smoothed Q-learning},
280
- author = {David Barber},
281
- journal={arXiv preprint arXiv:2303.08631},
282
- year = {2023},
283
- url = {http://arxiv.org/abs/2303.08631v1}
284
- }
285
-
286
- @article{2106.14642,
287
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
288
- from Offline Expert Examples},
289
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
290
- journal={arXiv preprint arXiv:2106.14642},
291
- year = {2021},
292
- url = {http://arxiv.org/abs/2106.14642v3}
293
- }
294
-
295
- @article{2106.01134,
296
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
297
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
298
- journal={arXiv preprint arXiv:2106.01134},
299
- year = {2021},
300
- url = {http://arxiv.org/abs/2106.01134v1}
301
- }
302
-
303
- @article{1512.07669,
304
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
305
- Decision Processes},
306
- author = {Vikram Krishnamurthy},
307
- journal={arXiv preprint arXiv:1512.07669},
308
- year = {2015},
309
- url = {http://arxiv.org/abs/1512.07669v1}
310
- }
311
-
312
- @article{1511.02377,
313
- title = {The Value Functions of Markov Decision Processes},
314
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
315
- journal={arXiv preprint arXiv:1511.02377},
316
- year = {2015},
317
- url = {http://arxiv.org/abs/1511.02377v1}
318
- }
319
-
320
- @article{1512.09075,
321
- title = {A Notation for Markov Decision Processes},
322
- author = {Philip S. Thomas , Billy Okal},
323
- journal={arXiv preprint arXiv:1512.09075},
324
- year = {2015},
325
- url = {http://arxiv.org/abs/1512.09075v2}
326
- }
327
-
328
- @article{2008.10426,
329
- title = {Taming denumerable Markov decision processes with decisiveness},
330
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
331
- journal={arXiv preprint arXiv:2008.10426},
332
- year = {2020},
333
- url = {http://arxiv.org/abs/2008.10426v1}
334
- }
335
-
336
- @article{0711.2185,
337
- title = {Exact finite approximations of average-cost countable Markov Decision
338
- Processes},
339
- author = {Arie Leizarowitz , Adam Shwartz},
340
- journal={arXiv preprint arXiv:0711.2185},
341
- year = {2007},
342
- url = {http://arxiv.org/abs/0711.2185v1}
343
- }
344
-
345
- @article{2303.08631,
346
- title = {Smoothed Q-learning},
347
- author = {David Barber},
348
- journal={arXiv preprint arXiv:2303.08631},
349
- year = {2023},
350
- url = {http://arxiv.org/abs/2303.08631v1}
351
- }
352
-
353
- @article{2106.14642,
354
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
355
- from Offline Expert Examples},
356
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
357
- journal={arXiv preprint arXiv:2106.14642},
358
- year = {2021},
359
- url = {http://arxiv.org/abs/2106.14642v3}
360
- }
361
-
362
- @article{2106.01134,
363
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
364
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
365
- journal={arXiv preprint arXiv:2106.01134},
366
- year = {2021},
367
- url = {http://arxiv.org/abs/2106.01134v1}
368
- }
369
-
370
- @article{2012.01100,
371
- title = {Self-correcting Q-Learning},
372
- author = {Rong Zhu , Mattia Rigotti},
373
- journal={arXiv preprint arXiv:2012.01100},
374
- year = {2020},
375
- url = {http://arxiv.org/abs/2012.01100v2}
376
- }
377
-
378
- @article{1512.07669,
379
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
380
- Decision Processes},
381
- author = {Vikram Krishnamurthy},
382
- journal={arXiv preprint arXiv:1512.07669},
383
- year = {2015},
384
- url = {http://arxiv.org/abs/1512.07669v1}
385
- }
386
-
387
- @article{1511.02377,
388
- title = {The Value Functions of Markov Decision Processes},
389
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
390
- journal={arXiv preprint arXiv:1511.02377},
391
- year = {2015},
392
- url = {http://arxiv.org/abs/1511.02377v1}
393
- }
394
-
395
- @article{1512.09075,
396
- title = {A Notation for Markov Decision Processes},
397
- author = {Philip S. Thomas , Billy Okal},
398
- journal={arXiv preprint arXiv:1512.09075},
399
- year = {2015},
400
- url = {http://arxiv.org/abs/1512.09075v2}
401
- }
402
-
403
- @article{2008.10426,
404
- title = {Taming denumerable Markov decision processes with decisiveness},
405
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
406
- journal={arXiv preprint arXiv:2008.10426},
407
- year = {2020},
408
- url = {http://arxiv.org/abs/2008.10426v1}
409
- }
410
-
411
- @article{0711.2185,
412
- title = {Exact finite approximations of average-cost countable Markov Decision
413
- Processes},
414
- author = {Arie Leizarowitz , Adam Shwartz},
415
- journal={arXiv preprint arXiv:0711.2185},
416
- year = {2007},
417
- url = {http://arxiv.org/abs/0711.2185v1}
418
- }
419
-
420
- @article{2303.08631,
421
- title = {Smoothed Q-learning},
422
- author = {David Barber},
423
- journal={arXiv preprint arXiv:2303.08631},
424
- year = {2023},
425
- url = {http://arxiv.org/abs/2303.08631v1}
426
- }
427
-
428
- @article{2106.14642,
429
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
430
- from Offline Expert Examples},
431
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
432
- journal={arXiv preprint arXiv:2106.14642},
433
- year = {2021},
434
- url = {http://arxiv.org/abs/2106.14642v3}
435
- }
436
-
437
- @article{2106.01134,
438
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
439
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
440
- journal={arXiv preprint arXiv:2106.01134},
441
- year = {2021},
442
- url = {http://arxiv.org/abs/2106.01134v1}
443
- }
444
-
445
- @article{2012.01100,
446
- title = {Self-correcting Q-Learning},
447
- author = {Rong Zhu , Mattia Rigotti},
448
- journal={arXiv preprint arXiv:2012.01100},
449
- year = {2020},
450
- url = {http://arxiv.org/abs/2012.01100v2}
451
- }
452
-
453
- @article{1703.02102,
454
- title = {Revisiting stochastic off-policy action-value gradients},
455
- author = {Yemi Okesanjo , Victor Kofia},
456
- journal={arXiv preprint arXiv:1703.02102},
457
- year = {2017},
458
- url = {http://arxiv.org/abs/1703.02102v2}
459
- }
460
-
461
- @article{1512.07669,
462
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
463
- Decision Processes},
464
- author = {Vikram Krishnamurthy},
465
- journal={arXiv preprint arXiv:1512.07669},
466
- year = {2015},
467
- url = {http://arxiv.org/abs/1512.07669v1}
468
- }
469
-
470
- @article{1511.02377,
471
- title = {The Value Functions of Markov Decision Processes},
472
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
473
- journal={arXiv preprint arXiv:1511.02377},
474
- year = {2015},
475
- url = {http://arxiv.org/abs/1511.02377v1}
476
- }
477
-
478
- @article{1512.09075,
479
- title = {A Notation for Markov Decision Processes},
480
- author = {Philip S. Thomas , Billy Okal},
481
- journal={arXiv preprint arXiv:1512.09075},
482
- year = {2015},
483
- url = {http://arxiv.org/abs/1512.09075v2}
484
- }
485
-
486
- @article{2008.10426,
487
- title = {Taming denumerable Markov decision processes with decisiveness},
488
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
489
- journal={arXiv preprint arXiv:2008.10426},
490
- year = {2020},
491
- url = {http://arxiv.org/abs/2008.10426v1}
492
- }
493
-
494
- @article{0711.2185,
495
- title = {Exact finite approximations of average-cost countable Markov Decision
496
- Processes},
497
- author = {Arie Leizarowitz , Adam Shwartz},
498
- journal={arXiv preprint arXiv:0711.2185},
499
- year = {2007},
500
- url = {http://arxiv.org/abs/0711.2185v1}
501
- }
502
-
503
- @article{2303.08631,
504
- title = {Smoothed Q-learning},
505
- author = {David Barber},
506
- journal={arXiv preprint arXiv:2303.08631},
507
- year = {2023},
508
- url = {http://arxiv.org/abs/2303.08631v1}
509
- }
510
-
511
- @article{2106.14642,
512
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
513
- from Offline Expert Examples},
514
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
515
- journal={arXiv preprint arXiv:2106.14642},
516
- year = {2021},
517
- url = {http://arxiv.org/abs/2106.14642v3}
518
- }
519
-
520
- @article{2106.01134,
521
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
522
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
523
- journal={arXiv preprint arXiv:2106.01134},
524
- year = {2021},
525
- url = {http://arxiv.org/abs/2106.01134v1}
526
- }
527
-
528
- @article{2012.01100,
529
- title = {Self-correcting Q-Learning},
530
- author = {Rong Zhu , Mattia Rigotti},
531
- journal={arXiv preprint arXiv:2012.01100},
532
- year = {2020},
533
- url = {http://arxiv.org/abs/2012.01100v2}
534
- }
535
-
536
- @article{1703.02102,
537
- title = {Revisiting stochastic off-policy action-value gradients},
538
- author = {Yemi Okesanjo , Victor Kofia},
539
- journal={arXiv preprint arXiv:1703.02102},
540
- year = {2017},
541
- url = {http://arxiv.org/abs/1703.02102v2}
542
- }
543
-
544
- @article{2209.01820,
545
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
546
- author = {W. J. A. van Heeswijk},
547
- journal={arXiv preprint arXiv:2209.01820},
548
- year = {2022},
549
- url = {http://arxiv.org/abs/2209.01820v1}
550
- }
551
-
552
- @article{1512.07669,
553
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
554
- Decision Processes},
555
- author = {Vikram Krishnamurthy},
556
- journal={arXiv preprint arXiv:1512.07669},
557
- year = {2015},
558
- url = {http://arxiv.org/abs/1512.07669v1}
559
- }
560
-
561
- @article{1511.02377,
562
- title = {The Value Functions of Markov Decision Processes},
563
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
564
- journal={arXiv preprint arXiv:1511.02377},
565
- year = {2015},
566
- url = {http://arxiv.org/abs/1511.02377v1}
567
- }
568
-
569
- @article{1512.09075,
570
- title = {A Notation for Markov Decision Processes},
571
- author = {Philip S. Thomas , Billy Okal},
572
- journal={arXiv preprint arXiv:1512.09075},
573
- year = {2015},
574
- url = {http://arxiv.org/abs/1512.09075v2}
575
- }
576
-
577
- @article{2008.10426,
578
- title = {Taming denumerable Markov decision processes with decisiveness},
579
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
580
- journal={arXiv preprint arXiv:2008.10426},
581
- year = {2020},
582
- url = {http://arxiv.org/abs/2008.10426v1}
583
- }
584
-
585
- @article{0711.2185,
586
- title = {Exact finite approximations of average-cost countable Markov Decision
587
- Processes},
588
- author = {Arie Leizarowitz , Adam Shwartz},
589
- journal={arXiv preprint arXiv:0711.2185},
590
- year = {2007},
591
- url = {http://arxiv.org/abs/0711.2185v1}
592
- }
593
-
594
- @article{2303.08631,
595
- title = {Smoothed Q-learning},
596
- author = {David Barber},
597
- journal={arXiv preprint arXiv:2303.08631},
598
- year = {2023},
599
- url = {http://arxiv.org/abs/2303.08631v1}
600
- }
601
-
602
- @article{2106.14642,
603
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
604
- from Offline Expert Examples},
605
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
606
- journal={arXiv preprint arXiv:2106.14642},
607
- year = {2021},
608
- url = {http://arxiv.org/abs/2106.14642v3}
609
- }
610
-
611
- @article{2106.01134,
612
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
613
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
614
- journal={arXiv preprint arXiv:2106.01134},
615
- year = {2021},
616
- url = {http://arxiv.org/abs/2106.01134v1}
617
- }
618
-
619
- @article{2012.01100,
620
- title = {Self-correcting Q-Learning},
621
- author = {Rong Zhu , Mattia Rigotti},
622
- journal={arXiv preprint arXiv:2012.01100},
623
- year = {2020},
624
- url = {http://arxiv.org/abs/2012.01100v2}
625
- }
626
-
627
- @article{1703.02102,
628
- title = {Revisiting stochastic off-policy action-value gradients},
629
- author = {Yemi Okesanjo , Victor Kofia},
630
- journal={arXiv preprint arXiv:1703.02102},
631
- year = {2017},
632
- url = {http://arxiv.org/abs/1703.02102v2}
633
- }
634
-
635
- @article{2209.01820,
636
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
637
- author = {W. J. A. van Heeswijk},
638
- journal={arXiv preprint arXiv:2209.01820},
639
- year = {2022},
640
- url = {http://arxiv.org/abs/2209.01820v1}
641
- }
642
-
643
- @article{1811.09013,
644
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
645
- author = {Ehsan Imani , Eric Graves , Martha White},
646
- journal={arXiv preprint arXiv:1811.09013},
647
- year = {2018},
648
- url = {http://arxiv.org/abs/1811.09013v2}
649
- }
650
-
651
- @article{1512.07669,
652
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
653
- Decision Processes},
654
- author = {Vikram Krishnamurthy},
655
- journal={arXiv preprint arXiv:1512.07669},
656
- year = {2015},
657
- url = {http://arxiv.org/abs/1512.07669v1}
658
- }
659
-
660
- @article{1511.02377,
661
- title = {The Value Functions of Markov Decision Processes},
662
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
663
- journal={arXiv preprint arXiv:1511.02377},
664
- year = {2015},
665
- url = {http://arxiv.org/abs/1511.02377v1}
666
- }
667
-
668
- @article{1512.09075,
669
- title = {A Notation for Markov Decision Processes},
670
- author = {Philip S. Thomas , Billy Okal},
671
- journal={arXiv preprint arXiv:1512.09075},
672
- year = {2015},
673
- url = {http://arxiv.org/abs/1512.09075v2}
674
- }
675
-
676
- @article{2008.10426,
677
- title = {Taming denumerable Markov decision processes with decisiveness},
678
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
679
- journal={arXiv preprint arXiv:2008.10426},
680
- year = {2020},
681
- url = {http://arxiv.org/abs/2008.10426v1}
682
- }
683
-
684
- @article{0711.2185,
685
- title = {Exact finite approximations of average-cost countable Markov Decision
686
- Processes},
687
- author = {Arie Leizarowitz , Adam Shwartz},
688
- journal={arXiv preprint arXiv:0711.2185},
689
- year = {2007},
690
- url = {http://arxiv.org/abs/0711.2185v1}
691
- }
692
-
693
- @article{2303.08631,
694
- title = {Smoothed Q-learning},
695
- author = {David Barber},
696
- journal={arXiv preprint arXiv:2303.08631},
697
- year = {2023},
698
- url = {http://arxiv.org/abs/2303.08631v1}
699
- }
700
-
701
- @article{2106.14642,
702
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
703
- from Offline Expert Examples},
704
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
705
- journal={arXiv preprint arXiv:2106.14642},
706
- year = {2021},
707
- url = {http://arxiv.org/abs/2106.14642v3}
708
- }
709
-
710
- @article{2106.01134,
711
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
712
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
713
- journal={arXiv preprint arXiv:2106.01134},
714
- year = {2021},
715
- url = {http://arxiv.org/abs/2106.01134v1}
716
- }
717
-
718
- @article{2012.01100,
719
- title = {Self-correcting Q-Learning},
720
- author = {Rong Zhu , Mattia Rigotti},
721
- journal={arXiv preprint arXiv:2012.01100},
722
- year = {2020},
723
- url = {http://arxiv.org/abs/2012.01100v2}
724
- }
725
-
726
- @article{1703.02102,
727
- title = {Revisiting stochastic off-policy action-value gradients},
728
- author = {Yemi Okesanjo , Victor Kofia},
729
- journal={arXiv preprint arXiv:1703.02102},
730
- year = {2017},
731
- url = {http://arxiv.org/abs/1703.02102v2}
732
- }
733
-
734
- @article{2209.01820,
735
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
736
- author = {W. J. A. van Heeswijk},
737
- journal={arXiv preprint arXiv:2209.01820},
738
- year = {2022},
739
- url = {http://arxiv.org/abs/2209.01820v1}
740
- }
741
-
742
- @article{1811.09013,
743
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
744
- author = {Ehsan Imani , Eric Graves , Martha White},
745
- journal={arXiv preprint arXiv:1811.09013},
746
- year = {2018},
747
- url = {http://arxiv.org/abs/1811.09013v2}
748
- }
749
-
750
- @article{1911.04817,
751
- title = {On Policy Gradients},
752
- author = {Mattis Manfred Kämmerer},
753
- journal={arXiv preprint arXiv:1911.04817},
754
- year = {2019},
755
- url = {http://arxiv.org/abs/1911.04817v1}
756
- }
757
-
758
- @article{1512.07669,
759
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
760
- Decision Processes},
761
- author = {Vikram Krishnamurthy},
762
- journal={arXiv preprint arXiv:1512.07669},
763
- year = {2015},
764
- url = {http://arxiv.org/abs/1512.07669v1}
765
- }
766
-
767
- @article{1511.02377,
768
- title = {The Value Functions of Markov Decision Processes},
769
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
770
- journal={arXiv preprint arXiv:1511.02377},
771
- year = {2015},
772
- url = {http://arxiv.org/abs/1511.02377v1}
773
- }
774
-
775
- @article{1512.09075,
776
- title = {A Notation for Markov Decision Processes},
777
- author = {Philip S. Thomas , Billy Okal},
778
- journal={arXiv preprint arXiv:1512.09075},
779
- year = {2015},
780
- url = {http://arxiv.org/abs/1512.09075v2}
781
- }
782
-
783
- @article{2008.10426,
784
- title = {Taming denumerable Markov decision processes with decisiveness},
785
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
786
- journal={arXiv preprint arXiv:2008.10426},
787
- year = {2020},
788
- url = {http://arxiv.org/abs/2008.10426v1}
789
- }
790
-
791
- @article{0711.2185,
792
- title = {Exact finite approximations of average-cost countable Markov Decision
793
- Processes},
794
- author = {Arie Leizarowitz , Adam Shwartz},
795
- journal={arXiv preprint arXiv:0711.2185},
796
- year = {2007},
797
- url = {http://arxiv.org/abs/0711.2185v1}
798
- }
799
-
800
- @article{2303.08631,
801
- title = {Smoothed Q-learning},
802
- author = {David Barber},
803
- journal={arXiv preprint arXiv:2303.08631},
804
- year = {2023},
805
- url = {http://arxiv.org/abs/2303.08631v1}
806
- }
807
-
808
- @article{2106.14642,
809
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
810
- from Offline Expert Examples},
811
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
812
- journal={arXiv preprint arXiv:2106.14642},
813
- year = {2021},
814
- url = {http://arxiv.org/abs/2106.14642v3}
815
- }
816
-
817
- @article{2106.01134,
818
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
819
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
820
- journal={arXiv preprint arXiv:2106.01134},
821
- year = {2021},
822
- url = {http://arxiv.org/abs/2106.01134v1}
823
- }
824
-
825
- @article{2012.01100,
826
- title = {Self-correcting Q-Learning},
827
- author = {Rong Zhu , Mattia Rigotti},
828
- journal={arXiv preprint arXiv:2012.01100},
829
- year = {2020},
830
- url = {http://arxiv.org/abs/2012.01100v2}
831
- }
832
-
833
- @article{1703.02102,
834
- title = {Revisiting stochastic off-policy action-value gradients},
835
- author = {Yemi Okesanjo , Victor Kofia},
836
- journal={arXiv preprint arXiv:1703.02102},
837
- year = {2017},
838
- url = {http://arxiv.org/abs/1703.02102v2}
839
- }
840
-
841
- @article{2209.01820,
842
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
843
- author = {W. J. A. van Heeswijk},
844
- journal={arXiv preprint arXiv:2209.01820},
845
- year = {2022},
846
- url = {http://arxiv.org/abs/2209.01820v1}
847
- }
848
-
849
- @article{1811.09013,
850
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
851
- author = {Ehsan Imani , Eric Graves , Martha White},
852
- journal={arXiv preprint arXiv:1811.09013},
853
- year = {2018},
854
- url = {http://arxiv.org/abs/1811.09013v2}
855
- }
856
-
857
- @article{1911.04817,
858
- title = {On Policy Gradients},
859
- author = {Mattis Manfred Kämmerer},
860
- journal={arXiv preprint arXiv:1911.04817},
861
- year = {2019},
862
- url = {http://arxiv.org/abs/1911.04817v1}
863
- }
864
-
865
- @article{2108.11510,
866
- title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey},
867
- author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides},
868
- journal={arXiv preprint arXiv:2108.11510},
869
- year = {2021},
870
- url = {http://arxiv.org/abs/2108.11510v1}
871
- }
872
-
873
- @article{1512.07669,
874
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
875
- Decision Processes},
876
- author = {Vikram Krishnamurthy},
877
- journal={arXiv preprint arXiv:1512.07669},
878
- year = {2015},
879
- url = {http://arxiv.org/abs/1512.07669v1}
880
- }
881
-
882
- @article{1511.02377,
883
- title = {The Value Functions of Markov Decision Processes},
884
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
885
- journal={arXiv preprint arXiv:1511.02377},
886
- year = {2015},
887
- url = {http://arxiv.org/abs/1511.02377v1}
888
- }
889
-
890
- @article{1512.09075,
891
- title = {A Notation for Markov Decision Processes},
892
- author = {Philip S. Thomas , Billy Okal},
893
- journal={arXiv preprint arXiv:1512.09075},
894
- year = {2015},
895
- url = {http://arxiv.org/abs/1512.09075v2}
896
- }
897
-
898
- @article{2008.10426,
899
- title = {Taming denumerable Markov decision processes with decisiveness},
900
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
901
- journal={arXiv preprint arXiv:2008.10426},
902
- year = {2020},
903
- url = {http://arxiv.org/abs/2008.10426v1}
904
- }
905
-
906
- @article{0711.2185,
907
- title = {Exact finite approximations of average-cost countable Markov Decision
908
- Processes},
909
- author = {Arie Leizarowitz , Adam Shwartz},
910
- journal={arXiv preprint arXiv:0711.2185},
911
- year = {2007},
912
- url = {http://arxiv.org/abs/0711.2185v1}
913
- }
914
-
915
- @article{2303.08631,
916
- title = {Smoothed Q-learning},
917
- author = {David Barber},
918
- journal={arXiv preprint arXiv:2303.08631},
919
- year = {2023},
920
- url = {http://arxiv.org/abs/2303.08631v1}
921
- }
922
-
923
- @article{2106.14642,
924
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
925
- from Offline Expert Examples},
926
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
927
- journal={arXiv preprint arXiv:2106.14642},
928
- year = {2021},
929
- url = {http://arxiv.org/abs/2106.14642v3}
930
- }
931
-
932
- @article{2106.01134,
933
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
934
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
935
- journal={arXiv preprint arXiv:2106.01134},
936
- year = {2021},
937
- url = {http://arxiv.org/abs/2106.01134v1}
938
- }
939
-
940
- @article{2012.01100,
941
- title = {Self-correcting Q-Learning},
942
- author = {Rong Zhu , Mattia Rigotti},
943
- journal={arXiv preprint arXiv:2012.01100},
944
- year = {2020},
945
- url = {http://arxiv.org/abs/2012.01100v2}
946
- }
947
-
948
- @article{1703.02102,
949
- title = {Revisiting stochastic off-policy action-value gradients},
950
- author = {Yemi Okesanjo , Victor Kofia},
951
- journal={arXiv preprint arXiv:1703.02102},
952
- year = {2017},
953
- url = {http://arxiv.org/abs/1703.02102v2}
954
- }
955
-
956
- @article{2209.01820,
957
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
958
- author = {W. J. A. van Heeswijk},
959
- journal={arXiv preprint arXiv:2209.01820},
960
- year = {2022},
961
- url = {http://arxiv.org/abs/2209.01820v1}
962
- }
963
-
964
- @article{1811.09013,
965
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
966
- author = {Ehsan Imani , Eric Graves , Martha White},
967
- journal={arXiv preprint arXiv:1811.09013},
968
- year = {2018},
969
- url = {http://arxiv.org/abs/1811.09013v2}
970
- }
971
-
972
- @article{1911.04817,
973
- title = {On Policy Gradients},
974
- author = {Mattis Manfred Kämmerer},
975
- journal={arXiv preprint arXiv:1911.04817},
976
- year = {2019},
977
- url = {http://arxiv.org/abs/1911.04817v1}
978
- }
979
-
980
- @article{2108.11510,
981
- title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey},
982
- author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides},
983
- journal={arXiv preprint arXiv:2108.11510},
984
- year = {2021},
985
- url = {http://arxiv.org/abs/2108.11510v1}
986
- }
987
-
988
- @article{2212.00253,
989
- title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player
990
- Multi-Agent Learning Toolbox},
991
- author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang},
992
- journal={arXiv preprint arXiv:2212.00253},
993
- year = {2022},
994
- url = {http://arxiv.org/abs/2212.00253v1}
995
- }
996
-
997
- @article{1512.07669,
998
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
999
- Decision Processes},
1000
- author = {Vikram Krishnamurthy},
1001
- journal={arXiv preprint arXiv:1512.07669},
1002
- year = {2015},
1003
- url = {http://arxiv.org/abs/1512.07669v1}
1004
- }
1005
-
1006
- @article{1511.02377,
1007
- title = {The Value Functions of Markov Decision Processes},
1008
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
1009
- journal={arXiv preprint arXiv:1511.02377},
1010
- year = {2015},
1011
- url = {http://arxiv.org/abs/1511.02377v1}
1012
- }
1013
-
1014
- @article{1512.09075,
1015
- title = {A Notation for Markov Decision Processes},
1016
- author = {Philip S. Thomas , Billy Okal},
1017
- journal={arXiv preprint arXiv:1512.09075},
1018
- year = {2015},
1019
- url = {http://arxiv.org/abs/1512.09075v2}
1020
- }
1021
-
1022
- @article{2008.10426,
1023
- title = {Taming denumerable Markov decision processes with decisiveness},
1024
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
1025
- journal={arXiv preprint arXiv:2008.10426},
1026
- year = {2020},
1027
- url = {http://arxiv.org/abs/2008.10426v1}
1028
- }
1029
-
1030
- @article{0711.2185,
1031
- title = {Exact finite approximations of average-cost countable Markov Decision
1032
- Processes},
1033
- author = {Arie Leizarowitz , Adam Shwartz},
1034
- journal={arXiv preprint arXiv:0711.2185},
1035
- year = {2007},
1036
- url = {http://arxiv.org/abs/0711.2185v1}
1037
- }
1038
-
1039
- @article{2303.08631,
1040
- title = {Smoothed Q-learning},
1041
- author = {David Barber},
1042
- journal={arXiv preprint arXiv:2303.08631},
1043
- year = {2023},
1044
- url = {http://arxiv.org/abs/2303.08631v1}
1045
- }
1046
-
1047
- @article{2106.14642,
1048
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
1049
- from Offline Expert Examples},
1050
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
1051
- journal={arXiv preprint arXiv:2106.14642},
1052
- year = {2021},
1053
- url = {http://arxiv.org/abs/2106.14642v3}
1054
- }
1055
-
1056
- @article{2106.01134,
1057
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
1058
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
1059
- journal={arXiv preprint arXiv:2106.01134},
1060
- year = {2021},
1061
- url = {http://arxiv.org/abs/2106.01134v1}
1062
- }
1063
-
1064
- @article{2012.01100,
1065
- title = {Self-correcting Q-Learning},
1066
- author = {Rong Zhu , Mattia Rigotti},
1067
- journal={arXiv preprint arXiv:2012.01100},
1068
- year = {2020},
1069
- url = {http://arxiv.org/abs/2012.01100v2}
1070
- }
1071
-
1072
- @article{1703.02102,
1073
- title = {Revisiting stochastic off-policy action-value gradients},
1074
- author = {Yemi Okesanjo , Victor Kofia},
1075
- journal={arXiv preprint arXiv:1703.02102},
1076
- year = {2017},
1077
- url = {http://arxiv.org/abs/1703.02102v2}
1078
- }
1079
-
1080
- @article{2209.01820,
1081
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
1082
- author = {W. J. A. van Heeswijk},
1083
- journal={arXiv preprint arXiv:2209.01820},
1084
- year = {2022},
1085
- url = {http://arxiv.org/abs/2209.01820v1}
1086
- }
1087
-
1088
- @article{1811.09013,
1089
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
1090
- author = {Ehsan Imani , Eric Graves , Martha White},
1091
- journal={arXiv preprint arXiv:1811.09013},
1092
- year = {2018},
1093
- url = {http://arxiv.org/abs/1811.09013v2}
1094
- }
1095
-
1096
- @article{1911.04817,
1097
- title = {On Policy Gradients},
1098
- author = {Mattis Manfred Kämmerer},
1099
- journal={arXiv preprint arXiv:1911.04817},
1100
- year = {2019},
1101
- url = {http://arxiv.org/abs/1911.04817v1}
1102
- }
1103
-
1104
- @article{2108.11510,
1105
- title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey},
1106
- author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides},
1107
- journal={arXiv preprint arXiv:2108.11510},
1108
- year = {2021},
1109
- url = {http://arxiv.org/abs/2108.11510v1}
1110
- }
1111
-
1112
- @article{2212.00253,
1113
- title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player
1114
- Multi-Agent Learning Toolbox},
1115
- author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang},
1116
- journal={arXiv preprint arXiv:2212.00253},
1117
- year = {2022},
1118
- url = {http://arxiv.org/abs/2212.00253v1}
1119
- }
1120
-
1121
- @article{1709.05067,
1122
- title = {Deep Reinforcement Learning for Conversational AI},
1123
- author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah},
1124
- journal={arXiv preprint arXiv:1709.05067},
1125
- year = {2017},
1126
- url = {http://arxiv.org/abs/1709.05067v1}
1127
- }
1128
-
1129
- @article{1512.07669,
1130
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
1131
- Decision Processes},
1132
- author = {Vikram Krishnamurthy},
1133
- journal={arXiv preprint arXiv:1512.07669},
1134
- year = {2015},
1135
- url = {http://arxiv.org/abs/1512.07669v1}
1136
- }
1137
-
1138
- @article{1511.02377,
1139
- title = {The Value Functions of Markov Decision Processes},
1140
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
1141
- journal={arXiv preprint arXiv:1511.02377},
1142
- year = {2015},
1143
- url = {http://arxiv.org/abs/1511.02377v1}
1144
- }
1145
-
1146
- @article{1512.09075,
1147
- title = {A Notation for Markov Decision Processes},
1148
- author = {Philip S. Thomas , Billy Okal},
1149
- journal={arXiv preprint arXiv:1512.09075},
1150
- year = {2015},
1151
- url = {http://arxiv.org/abs/1512.09075v2}
1152
- }
1153
-
1154
- @article{2008.10426,
1155
- title = {Taming denumerable Markov decision processes with decisiveness},
1156
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
1157
- journal={arXiv preprint arXiv:2008.10426},
1158
- year = {2020},
1159
- url = {http://arxiv.org/abs/2008.10426v1}
1160
- }
1161
-
1162
- @article{0711.2185,
1163
- title = {Exact finite approximations of average-cost countable Markov Decision
1164
- Processes},
1165
- author = {Arie Leizarowitz , Adam Shwartz},
1166
- journal={arXiv preprint arXiv:0711.2185},
1167
- year = {2007},
1168
- url = {http://arxiv.org/abs/0711.2185v1}
1169
- }
1170
-
1171
- @article{2303.08631,
1172
- title = {Smoothed Q-learning},
1173
- author = {David Barber},
1174
- journal={arXiv preprint arXiv:2303.08631},
1175
- year = {2023},
1176
- url = {http://arxiv.org/abs/2303.08631v1}
1177
- }
1178
-
1179
- @article{2106.14642,
1180
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
1181
- from Offline Expert Examples},
1182
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
1183
- journal={arXiv preprint arXiv:2106.14642},
1184
- year = {2021},
1185
- url = {http://arxiv.org/abs/2106.14642v3}
1186
- }
1187
-
1188
- @article{2106.01134,
1189
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
1190
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
1191
- journal={arXiv preprint arXiv:2106.01134},
1192
- year = {2021},
1193
- url = {http://arxiv.org/abs/2106.01134v1}
1194
- }
1195
-
1196
- @article{2012.01100,
1197
- title = {Self-correcting Q-Learning},
1198
- author = {Rong Zhu , Mattia Rigotti},
1199
- journal={arXiv preprint arXiv:2012.01100},
1200
- year = {2020},
1201
- url = {http://arxiv.org/abs/2012.01100v2}
1202
- }
1203
-
1204
- @article{1703.02102,
1205
- title = {Revisiting stochastic off-policy action-value gradients},
1206
- author = {Yemi Okesanjo , Victor Kofia},
1207
- journal={arXiv preprint arXiv:1703.02102},
1208
- year = {2017},
1209
- url = {http://arxiv.org/abs/1703.02102v2}
1210
- }
1211
-
1212
- @article{2209.01820,
1213
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
1214
- author = {W. J. A. van Heeswijk},
1215
- journal={arXiv preprint arXiv:2209.01820},
1216
- year = {2022},
1217
- url = {http://arxiv.org/abs/2209.01820v1}
1218
- }
1219
-
1220
- @article{1811.09013,
1221
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
1222
- author = {Ehsan Imani , Eric Graves , Martha White},
1223
- journal={arXiv preprint arXiv:1811.09013},
1224
- year = {2018},
1225
- url = {http://arxiv.org/abs/1811.09013v2}
1226
- }
1227
-
1228
- @article{1911.04817,
1229
- title = {On Policy Gradients},
1230
- author = {Mattis Manfred Kämmerer},
1231
- journal={arXiv preprint arXiv:1911.04817},
1232
- year = {2019},
1233
- url = {http://arxiv.org/abs/1911.04817v1}
1234
- }
1235
-
1236
- @article{2108.11510,
1237
- title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey},
1238
- author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides},
1239
- journal={arXiv preprint arXiv:2108.11510},
1240
- year = {2021},
1241
- url = {http://arxiv.org/abs/2108.11510v1}
1242
- }
1243
-
1244
- @article{2212.00253,
1245
- title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player
1246
- Multi-Agent Learning Toolbox},
1247
- author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang},
1248
- journal={arXiv preprint arXiv:2212.00253},
1249
- year = {2022},
1250
- url = {http://arxiv.org/abs/2212.00253v1}
1251
- }
1252
-
1253
- @article{1709.05067,
1254
- title = {Deep Reinforcement Learning for Conversational AI},
1255
- author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah},
1256
- journal={arXiv preprint arXiv:1709.05067},
1257
- year = {2017},
1258
- url = {http://arxiv.org/abs/1709.05067v1}
1259
- }
1260
-
1261
- @article{1708.05866,
1262
- title = {A Brief Survey of Deep Reinforcement Learning},
1263
- author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath},
1264
- journal={arXiv preprint arXiv:1708.05866},
1265
- year = {2017},
1266
- url = {http://arxiv.org/abs/1708.05866v2}
1267
- }
1268
-
1269
- @article{1512.07669,
1270
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
1271
- Decision Processes},
1272
- author = {Vikram Krishnamurthy},
1273
- journal={arXiv preprint arXiv:1512.07669},
1274
- year = {2015},
1275
- url = {http://arxiv.org/abs/1512.07669v1}
1276
- }
1277
-
1278
- @article{1511.02377,
1279
- title = {The Value Functions of Markov Decision Processes},
1280
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
1281
- journal={arXiv preprint arXiv:1511.02377},
1282
- year = {2015},
1283
- url = {http://arxiv.org/abs/1511.02377v1}
1284
- }
1285
-
1286
- @article{1512.09075,
1287
- title = {A Notation for Markov Decision Processes},
1288
- author = {Philip S. Thomas , Billy Okal},
1289
- journal={arXiv preprint arXiv:1512.09075},
1290
- year = {2015},
1291
- url = {http://arxiv.org/abs/1512.09075v2}
1292
- }
1293
-
1294
- @article{2008.10426,
1295
- title = {Taming denumerable Markov decision processes with decisiveness},
1296
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
1297
- journal={arXiv preprint arXiv:2008.10426},
1298
- year = {2020},
1299
- url = {http://arxiv.org/abs/2008.10426v1}
1300
- }
1301
-
1302
- @article{0711.2185,
1303
- title = {Exact finite approximations of average-cost countable Markov Decision
1304
- Processes},
1305
- author = {Arie Leizarowitz , Adam Shwartz},
1306
- journal={arXiv preprint arXiv:0711.2185},
1307
- year = {2007},
1308
- url = {http://arxiv.org/abs/0711.2185v1}
1309
- }
1310
-
1311
- @article{2303.08631,
1312
- title = {Smoothed Q-learning},
1313
- author = {David Barber},
1314
- journal={arXiv preprint arXiv:2303.08631},
1315
- year = {2023},
1316
- url = {http://arxiv.org/abs/2303.08631v1}
1317
- }
1318
-
1319
- @article{2106.14642,
1320
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
1321
- from Offline Expert Examples},
1322
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
1323
- journal={arXiv preprint arXiv:2106.14642},
1324
- year = {2021},
1325
- url = {http://arxiv.org/abs/2106.14642v3}
1326
- }
1327
-
1328
- @article{2106.01134,
1329
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
1330
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
1331
- journal={arXiv preprint arXiv:2106.01134},
1332
- year = {2021},
1333
- url = {http://arxiv.org/abs/2106.01134v1}
1334
- }
1335
-
1336
- @article{2012.01100,
1337
- title = {Self-correcting Q-Learning},
1338
- author = {Rong Zhu , Mattia Rigotti},
1339
- journal={arXiv preprint arXiv:2012.01100},
1340
- year = {2020},
1341
- url = {http://arxiv.org/abs/2012.01100v2}
1342
- }
1343
-
1344
- @article{1703.02102,
1345
- title = {Revisiting stochastic off-policy action-value gradients},
1346
- author = {Yemi Okesanjo , Victor Kofia},
1347
- journal={arXiv preprint arXiv:1703.02102},
1348
- year = {2017},
1349
- url = {http://arxiv.org/abs/1703.02102v2}
1350
- }
1351
-
1352
- @article{2209.01820,
1353
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
1354
- author = {W. J. A. van Heeswijk},
1355
- journal={arXiv preprint arXiv:2209.01820},
1356
- year = {2022},
1357
- url = {http://arxiv.org/abs/2209.01820v1}
1358
- }
1359
-
1360
- @article{1811.09013,
1361
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
1362
- author = {Ehsan Imani , Eric Graves , Martha White},
1363
- journal={arXiv preprint arXiv:1811.09013},
1364
- year = {2018},
1365
- url = {http://arxiv.org/abs/1811.09013v2}
1366
- }
1367
-
1368
- @article{1911.04817,
1369
- title = {On Policy Gradients},
1370
- author = {Mattis Manfred Kämmerer},
1371
- journal={arXiv preprint arXiv:1911.04817},
1372
- year = {2019},
1373
- url = {http://arxiv.org/abs/1911.04817v1}
1374
- }
1375
-
1376
- @article{2108.11510,
1377
- title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey},
1378
- author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides},
1379
- journal={arXiv preprint arXiv:2108.11510},
1380
- year = {2021},
1381
- url = {http://arxiv.org/abs/2108.11510v1}
1382
- }
1383
-
1384
- @article{2212.00253,
1385
- title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player
1386
- Multi-Agent Learning Toolbox},
1387
- author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang},
1388
- journal={arXiv preprint arXiv:2212.00253},
1389
- year = {2022},
1390
- url = {http://arxiv.org/abs/2212.00253v1}
1391
- }
1392
-
1393
- @article{1709.05067,
1394
- title = {Deep Reinforcement Learning for Conversational AI},
1395
- author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah},
1396
- journal={arXiv preprint arXiv:1709.05067},
1397
- year = {2017},
1398
- url = {http://arxiv.org/abs/1709.05067v1}
1399
- }
1400
-
1401
- @article{1708.05866,
1402
- title = {A Brief Survey of Deep Reinforcement Learning},
1403
- author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath},
1404
- journal={arXiv preprint arXiv:1708.05866},
1405
- year = {2017},
1406
- url = {http://arxiv.org/abs/1708.05866v2}
1407
- }
1408
-
1409
- @article{1906.10025,
1410
- title = {Modern Deep Reinforcement Learning Algorithms},
1411
- author = {Sergey Ivanov , Alexander D'yakonov},
1412
- journal={arXiv preprint arXiv:1906.10025},
1413
- year = {2019},
1414
- url = {http://arxiv.org/abs/1906.10025v2}
1415
- }
1416
-
1417
- @article{1512.07669,
1418
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
1419
- Decision Processes},
1420
- author = {Vikram Krishnamurthy},
1421
- journal={arXiv preprint arXiv:1512.07669},
1422
- year = {2015},
1423
- url = {http://arxiv.org/abs/1512.07669v1}
1424
- }
1425
-
1426
- @article{1511.02377,
1427
- title = {The Value Functions of Markov Decision Processes},
1428
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
1429
- journal={arXiv preprint arXiv:1511.02377},
1430
- year = {2015},
1431
- url = {http://arxiv.org/abs/1511.02377v1}
1432
- }
1433
-
1434
- @article{1512.09075,
1435
- title = {A Notation for Markov Decision Processes},
1436
- author = {Philip S. Thomas , Billy Okal},
1437
- journal={arXiv preprint arXiv:1512.09075},
1438
- year = {2015},
1439
- url = {http://arxiv.org/abs/1512.09075v2}
1440
- }
1441
-
1442
- @article{2008.10426,
1443
- title = {Taming denumerable Markov decision processes with decisiveness},
1444
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
1445
- journal={arXiv preprint arXiv:2008.10426},
1446
- year = {2020},
1447
- url = {http://arxiv.org/abs/2008.10426v1}
1448
- }
1449
-
1450
- @article{0711.2185,
1451
- title = {Exact finite approximations of average-cost countable Markov Decision
1452
- Processes},
1453
- author = {Arie Leizarowitz , Adam Shwartz},
1454
- journal={arXiv preprint arXiv:0711.2185},
1455
- year = {2007},
1456
- url = {http://arxiv.org/abs/0711.2185v1}
1457
- }
1458
-
1459
- @article{2303.08631,
1460
- title = {Smoothed Q-learning},
1461
- author = {David Barber},
1462
- journal={arXiv preprint arXiv:2303.08631},
1463
- year = {2023},
1464
- url = {http://arxiv.org/abs/2303.08631v1}
1465
- }
1466
-
1467
- @article{2106.14642,
1468
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
1469
- from Offline Expert Examples},
1470
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
1471
- journal={arXiv preprint arXiv:2106.14642},
1472
- year = {2021},
1473
- url = {http://arxiv.org/abs/2106.14642v3}
1474
- }
1475
-
1476
- @article{2106.01134,
1477
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
1478
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
1479
- journal={arXiv preprint arXiv:2106.01134},
1480
- year = {2021},
1481
- url = {http://arxiv.org/abs/2106.01134v1}
1482
- }
1483
-
1484
- @article{2012.01100,
1485
- title = {Self-correcting Q-Learning},
1486
- author = {Rong Zhu , Mattia Rigotti},
1487
- journal={arXiv preprint arXiv:2012.01100},
1488
- year = {2020},
1489
- url = {http://arxiv.org/abs/2012.01100v2}
1490
- }
1491
-
1492
- @article{1703.02102,
1493
- title = {Revisiting stochastic off-policy action-value gradients},
1494
- author = {Yemi Okesanjo , Victor Kofia},
1495
- journal={arXiv preprint arXiv:1703.02102},
1496
- year = {2017},
1497
- url = {http://arxiv.org/abs/1703.02102v2}
1498
- }
1499
-
1500
- @article{2209.01820,
1501
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
1502
- author = {W. J. A. van Heeswijk},
1503
- journal={arXiv preprint arXiv:2209.01820},
1504
- year = {2022},
1505
- url = {http://arxiv.org/abs/2209.01820v1}
1506
- }
1507
-
1508
- @article{1811.09013,
1509
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
1510
- author = {Ehsan Imani , Eric Graves , Martha White},
1511
- journal={arXiv preprint arXiv:1811.09013},
1512
- year = {2018},
1513
- url = {http://arxiv.org/abs/1811.09013v2}
1514
- }
1515
-
1516
- @article{1911.04817,
1517
- title = {On Policy Gradients},
1518
- author = {Mattis Manfred Kämmerer},
1519
- journal={arXiv preprint arXiv:1911.04817},
1520
- year = {2019},
1521
- url = {http://arxiv.org/abs/1911.04817v1}
1522
- }
1523
-
1524
- @article{2108.11510,
1525
- title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey},
1526
- author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides},
1527
- journal={arXiv preprint arXiv:2108.11510},
1528
- year = {2021},
1529
- url = {http://arxiv.org/abs/2108.11510v1}
1530
- }
1531
-
1532
- @article{2212.00253,
1533
- title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player
1534
- Multi-Agent Learning Toolbox},
1535
- author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang},
1536
- journal={arXiv preprint arXiv:2212.00253},
1537
- year = {2022},
1538
- url = {http://arxiv.org/abs/2212.00253v1}
1539
- }
1540
-
1541
- @article{1709.05067,
1542
- title = {Deep Reinforcement Learning for Conversational AI},
1543
- author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah},
1544
- journal={arXiv preprint arXiv:1709.05067},
1545
- year = {2017},
1546
- url = {http://arxiv.org/abs/1709.05067v1}
1547
- }
1548
-
1549
- @article{1708.05866,
1550
- title = {A Brief Survey of Deep Reinforcement Learning},
1551
- author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath},
1552
- journal={arXiv preprint arXiv:1708.05866},
1553
- year = {2017},
1554
- url = {http://arxiv.org/abs/1708.05866v2}
1555
- }
1556
-
1557
- @article{1906.10025,
1558
- title = {Modern Deep Reinforcement Learning Algorithms},
1559
- author = {Sergey Ivanov , Alexander D'yakonov},
1560
- journal={arXiv preprint arXiv:1906.10025},
1561
- year = {2019},
1562
- url = {http://arxiv.org/abs/1906.10025v2}
1563
- }
1564
-
1565
- @article{2111.01334,
1566
- title = {Measuring and utilizing temporal network dissimilarity},
1567
- author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang},
1568
- journal={arXiv preprint arXiv:2111.01334},
1569
- year = {2021},
1570
- url = {http://arxiv.org/abs/2111.01334v1}
1571
- }
1572
-
1573
- @article{1512.07669,
1574
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
1575
- Decision Processes},
1576
- author = {Vikram Krishnamurthy},
1577
- journal={arXiv preprint arXiv:1512.07669},
1578
- year = {2015},
1579
- url = {http://arxiv.org/abs/1512.07669v1}
1580
- }
1581
-
1582
- @article{1511.02377,
1583
- title = {The Value Functions of Markov Decision Processes},
1584
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
1585
- journal={arXiv preprint arXiv:1511.02377},
1586
- year = {2015},
1587
- url = {http://arxiv.org/abs/1511.02377v1}
1588
- }
1589
-
1590
- @article{1512.09075,
1591
- title = {A Notation for Markov Decision Processes},
1592
- author = {Philip S. Thomas , Billy Okal},
1593
- journal={arXiv preprint arXiv:1512.09075},
1594
- year = {2015},
1595
- url = {http://arxiv.org/abs/1512.09075v2}
1596
- }
1597
-
1598
- @article{2008.10426,
1599
- title = {Taming denumerable Markov decision processes with decisiveness},
1600
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
1601
- journal={arXiv preprint arXiv:2008.10426},
1602
- year = {2020},
1603
- url = {http://arxiv.org/abs/2008.10426v1}
1604
- }
1605
-
1606
- @article{0711.2185,
1607
- title = {Exact finite approximations of average-cost countable Markov Decision
1608
- Processes},
1609
- author = {Arie Leizarowitz , Adam Shwartz},
1610
- journal={arXiv preprint arXiv:0711.2185},
1611
- year = {2007},
1612
- url = {http://arxiv.org/abs/0711.2185v1}
1613
- }
1614
-
1615
- @article{2303.08631,
1616
- title = {Smoothed Q-learning},
1617
- author = {David Barber},
1618
- journal={arXiv preprint arXiv:2303.08631},
1619
- year = {2023},
1620
- url = {http://arxiv.org/abs/2303.08631v1}
1621
- }
1622
-
1623
- @article{2106.14642,
1624
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
1625
- from Offline Expert Examples},
1626
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
1627
- journal={arXiv preprint arXiv:2106.14642},
1628
- year = {2021},
1629
- url = {http://arxiv.org/abs/2106.14642v3}
1630
- }
1631
-
1632
- @article{2106.01134,
1633
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
1634
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
1635
- journal={arXiv preprint arXiv:2106.01134},
1636
- year = {2021},
1637
- url = {http://arxiv.org/abs/2106.01134v1}
1638
- }
1639
-
1640
- @article{2012.01100,
1641
- title = {Self-correcting Q-Learning},
1642
- author = {Rong Zhu , Mattia Rigotti},
1643
- journal={arXiv preprint arXiv:2012.01100},
1644
- year = {2020},
1645
- url = {http://arxiv.org/abs/2012.01100v2}
1646
- }
1647
-
1648
- @article{1703.02102,
1649
- title = {Revisiting stochastic off-policy action-value gradients},
1650
- author = {Yemi Okesanjo , Victor Kofia},
1651
- journal={arXiv preprint arXiv:1703.02102},
1652
- year = {2017},
1653
- url = {http://arxiv.org/abs/1703.02102v2}
1654
- }
1655
-
1656
- @article{2209.01820,
1657
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
1658
- author = {W. J. A. van Heeswijk},
1659
- journal={arXiv preprint arXiv:2209.01820},
1660
- year = {2022},
1661
- url = {http://arxiv.org/abs/2209.01820v1}
1662
- }
1663
-
1664
- @article{1811.09013,
1665
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
1666
- author = {Ehsan Imani , Eric Graves , Martha White},
1667
- journal={arXiv preprint arXiv:1811.09013},
1668
- year = {2018},
1669
- url = {http://arxiv.org/abs/1811.09013v2}
1670
- }
1671
-
1672
- @article{1911.04817,
1673
- title = {On Policy Gradients},
1674
- author = {Mattis Manfred Kämmerer},
1675
- journal={arXiv preprint arXiv:1911.04817},
1676
- year = {2019},
1677
- url = {http://arxiv.org/abs/1911.04817v1}
1678
- }
1679
-
1680
- @article{2108.11510,
1681
- title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey},
1682
- author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides},
1683
- journal={arXiv preprint arXiv:2108.11510},
1684
- year = {2021},
1685
- url = {http://arxiv.org/abs/2108.11510v1}
1686
- }
1687
-
1688
- @article{2212.00253,
1689
- title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player
1690
- Multi-Agent Learning Toolbox},
1691
- author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang},
1692
- journal={arXiv preprint arXiv:2212.00253},
1693
- year = {2022},
1694
- url = {http://arxiv.org/abs/2212.00253v1}
1695
- }
1696
-
1697
- @article{1709.05067,
1698
- title = {Deep Reinforcement Learning for Conversational AI},
1699
- author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah},
1700
- journal={arXiv preprint arXiv:1709.05067},
1701
- year = {2017},
1702
- url = {http://arxiv.org/abs/1709.05067v1}
1703
- }
1704
-
1705
- @article{1708.05866,
1706
- title = {A Brief Survey of Deep Reinforcement Learning},
1707
- author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath},
1708
- journal={arXiv preprint arXiv:1708.05866},
1709
- year = {2017},
1710
- url = {http://arxiv.org/abs/1708.05866v2}
1711
- }
1712
-
1713
- @article{1906.10025,
1714
- title = {Modern Deep Reinforcement Learning Algorithms},
1715
- author = {Sergey Ivanov , Alexander D'yakonov},
1716
- journal={arXiv preprint arXiv:1906.10025},
1717
- year = {2019},
1718
- url = {http://arxiv.org/abs/1906.10025v2}
1719
- }
1720
-
1721
- @article{2111.01334,
1722
- title = {Measuring and utilizing temporal network dissimilarity},
1723
- author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang},
1724
- journal={arXiv preprint arXiv:2111.01334},
1725
- year = {2021},
1726
- url = {http://arxiv.org/abs/2111.01334v1}
1727
- }
1728
-
1729
- @article{2110.06553,
1730
- title = {Spatial-temporal Transformers for EEG Emotion Recognition},
1731
- author = {Jiyao Liu , Hao Wu , Li Zhang , Yanxi Zhao},
1732
- journal={arXiv preprint arXiv:2110.06553},
1733
- year = {2021},
1734
- url = {http://arxiv.org/abs/2110.06553v2}
1735
- }
1736
-
1737
- @article{1512.07669,
1738
- title = {Reinforcement Learning: Stochastic Approximation Algorithms for Markov
1739
- Decision Processes},
1740
- author = {Vikram Krishnamurthy},
1741
- journal={arXiv preprint arXiv:1512.07669},
1742
- year = {2015},
1743
- url = {http://arxiv.org/abs/1512.07669v1}
1744
- }
1745
-
1746
- @article{1511.02377,
1747
- title = {The Value Functions of Markov Decision Processes},
1748
- author = {Ehud Lehrer , Eilon Solan , Omri N. Solan},
1749
- journal={arXiv preprint arXiv:1511.02377},
1750
- year = {2015},
1751
- url = {http://arxiv.org/abs/1511.02377v1}
1752
- }
1753
-
1754
- @article{1512.09075,
1755
- title = {A Notation for Markov Decision Processes},
1756
- author = {Philip S. Thomas , Billy Okal},
1757
- journal={arXiv preprint arXiv:1512.09075},
1758
- year = {2015},
1759
- url = {http://arxiv.org/abs/1512.09075v2}
1760
- }
1761
-
1762
- @article{2008.10426,
1763
- title = {Taming denumerable Markov decision processes with decisiveness},
1764
- author = {Nathalie Bertrand , Patricia Bouyer , Thomas Brihaye , Paulin Fournier},
1765
- journal={arXiv preprint arXiv:2008.10426},
1766
- year = {2020},
1767
- url = {http://arxiv.org/abs/2008.10426v1}
1768
- }
1769
-
1770
- @article{0711.2185,
1771
- title = {Exact finite approximations of average-cost countable Markov Decision
1772
- Processes},
1773
- author = {Arie Leizarowitz , Adam Shwartz},
1774
- journal={arXiv preprint arXiv:0711.2185},
1775
- year = {2007},
1776
- url = {http://arxiv.org/abs/0711.2185v1}
1777
- }
1778
-
1779
- @article{2303.08631,
1780
- title = {Smoothed Q-learning},
1781
- author = {David Barber},
1782
- journal={arXiv preprint arXiv:2303.08631},
1783
- year = {2023},
1784
- url = {http://arxiv.org/abs/2303.08631v1}
1785
- }
1786
-
1787
- @article{2106.14642,
1788
- title = {Expert Q-learning: Deep Reinforcement Learning with Coarse State Values
1789
- from Offline Expert Examples},
1790
- author = {Li Meng , Anis Yazidi , Morten Goodwin , Paal Engelstad},
1791
- journal={arXiv preprint arXiv:2106.14642},
1792
- year = {2021},
1793
- url = {http://arxiv.org/abs/2106.14642v3}
1794
- }
1795
-
1796
- @article{2106.01134,
1797
- title = {Smooth Q-learning: Accelerate Convergence of Q-learning Using Similarity},
1798
- author = {Wei Liao , Xiaohui Wei , Jizhou Lai},
1799
- journal={arXiv preprint arXiv:2106.01134},
1800
- year = {2021},
1801
- url = {http://arxiv.org/abs/2106.01134v1}
1802
- }
1803
-
1804
- @article{2012.01100,
1805
- title = {Self-correcting Q-Learning},
1806
- author = {Rong Zhu , Mattia Rigotti},
1807
- journal={arXiv preprint arXiv:2012.01100},
1808
- year = {2020},
1809
- url = {http://arxiv.org/abs/2012.01100v2}
1810
- }
1811
-
1812
- @article{1703.02102,
1813
- title = {Revisiting stochastic off-policy action-value gradients},
1814
- author = {Yemi Okesanjo , Victor Kofia},
1815
- journal={arXiv preprint arXiv:1703.02102},
1816
- year = {2017},
1817
- url = {http://arxiv.org/abs/1703.02102v2}
1818
- }
1819
-
1820
- @article{2209.01820,
1821
- title = {Natural Policy Gradients In Reinforcement Learning Explained},
1822
- author = {W. J. A. van Heeswijk},
1823
- journal={arXiv preprint arXiv:2209.01820},
1824
- year = {2022},
1825
- url = {http://arxiv.org/abs/2209.01820v1}
1826
- }
1827
-
1828
- @article{1811.09013,
1829
- title = {An Off-policy Policy Gradient Theorem Using Emphatic Weightings},
1830
- author = {Ehsan Imani , Eric Graves , Martha White},
1831
- journal={arXiv preprint arXiv:1811.09013},
1832
- year = {2018},
1833
- url = {http://arxiv.org/abs/1811.09013v2}
1834
- }
1835
-
1836
- @article{1911.04817,
1837
- title = {On Policy Gradients},
1838
- author = {Mattis Manfred Kämmerer},
1839
- journal={arXiv preprint arXiv:1911.04817},
1840
- year = {2019},
1841
- url = {http://arxiv.org/abs/1911.04817v1}
1842
- }
1843
-
1844
- @article{2108.11510,
1845
- title = {Deep Reinforcement Learning in Computer Vision: A Comprehensive Survey},
1846
- author = {Ngan Le , Vidhiwar Singh Rathour , Kashu Yamazaki , Khoa Luu , Marios Savvides},
1847
- journal={arXiv preprint arXiv:2108.11510},
1848
- year = {2021},
1849
- url = {http://arxiv.org/abs/2108.11510v1}
1850
- }
1851
-
1852
- @article{2212.00253,
1853
- title = {Distributed Deep Reinforcement Learning: A Survey and A Multi-Player
1854
- Multi-Agent Learning Toolbox},
1855
- author = {Qiyue Yin , Tongtong Yu , Shengqi Shen , Jun Yang , Meijing Zhao , Kaiqi Huang , Bin Liang , Liang Wang},
1856
- journal={arXiv preprint arXiv:2212.00253},
1857
- year = {2022},
1858
- url = {http://arxiv.org/abs/2212.00253v1}
1859
- }
1860
-
1861
- @article{1709.05067,
1862
- title = {Deep Reinforcement Learning for Conversational AI},
1863
- author = {Mahipal Jadeja , Neelanshi Varia , Agam Shah},
1864
- journal={arXiv preprint arXiv:1709.05067},
1865
- year = {2017},
1866
- url = {http://arxiv.org/abs/1709.05067v1}
1867
- }
1868
-
1869
- @article{1708.05866,
1870
- title = {A Brief Survey of Deep Reinforcement Learning},
1871
- author = {Kai Arulkumaran , Marc Peter Deisenroth , Miles Brundage , Anil Anthony Bharath},
1872
- journal={arXiv preprint arXiv:1708.05866},
1873
- year = {2017},
1874
- url = {http://arxiv.org/abs/1708.05866v2}
1875
- }
1876
-
1877
- @article{1906.10025,
1878
- title = {Modern Deep Reinforcement Learning Algorithms},
1879
- author = {Sergey Ivanov , Alexander D'yakonov},
1880
- journal={arXiv preprint arXiv:1906.10025},
1881
- year = {2019},
1882
- url = {http://arxiv.org/abs/1906.10025v2}
1883
- }
1884
-
1885
- @article{2111.01334,
1886
- title = {Measuring and utilizing temporal network dissimilarity},
1887
- author = {Xiu-Xiu Zhan , Chuang Liu , Zhipeng Wang , Huijuang Wang , Petter Holme , Zi-Ke Zhang},
1888
- journal={arXiv preprint arXiv:2111.01334},
1889
- year = {2021},
1890
- url = {http://arxiv.org/abs/2111.01334v1}
1891
- }
1892
-
1893
- @article{2110.06553,
1894
- title = {Spatial-temporal Transformers for EEG Emotion Recognition},
1895
- author = {Jiyao Liu , Hao Wu , Li Zhang , Yanxi Zhao},
1896
- journal={arXiv preprint arXiv:2110.06553},
1897
- year = {2021},
1898
- url = {http://arxiv.org/abs/2110.06553v2}
1899
- }
1900
-
1901
- @article{2007.04828,
1902
- title = {Predictability of real temporal networks},
1903
- author = {Disheng Tang , Wenbo Du , Louis Shekhtman , Yijie Wang , Shlomo Havlin , Xianbin Cao , Gang Yan},
1904
- journal={arXiv preprint arXiv:2007.04828},
1905
- year = {2020},
1906
- url = {http://arxiv.org/abs/2007.04828v1}
1907
- }
1908
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_000752/related works.tex DELETED
@@ -1,20 +0,0 @@
1
- \section{related works}
2
- \paragraph{Markov Decision Processes:}
3
- The study of reinforcement learning is fundamentally rooted in the understanding of Markov decision processes (MDPs). A concise description of stochastic approximation algorithms in reinforcement learning of MDPs is provided by \cite{1512.07669}. The work done in \cite{1511.02377} offers a full characterization of the set of value functions of MDPs, while \cite{1512.09075} specifies a notation for MDPs. The concept of decisiveness in denumerable Markov chains has been extended to MDPs in \cite{2008.10426}, exploring the implications of resolving non-determinism in adversarial or cooperative ways. Additionally, \cite{0711.2185} introduces an embedding technique to produce a finite-state MDP from a countable-state MDP, which can be used as an approximation for computational purposes.
4
-
5
- \paragraph{Q-Learning and Variants:}
6
- Q-learning is a widely used reinforcement learning algorithm that converges to the optimal solution \cite{2303.08631}. However, it is known to overestimate values and spend too much time exploring unhelpful states. Double Q-learning, a convergent alternative, mitigates some of these overestimation issues but may lead to slower convergence \cite{2303.08631}. To address the maximization bias in Q-learning, \cite{2012.01100} introduces a self-correcting algorithm that balances the overestimation of conventional Q-learning and the underestimation of Double Q-learning. This self-correcting Q-learning algorithm is shown to be more accurate and achieves faster convergence in certain domains.
7
-
8
- \paragraph{Expert Q-Learning:}
9
- Expert Q-learning is a novel deep reinforcement learning algorithm proposed in \cite{2106.14642}. Inspired by Dueling Q-learning, it incorporates semi-supervised learning into reinforcement learning by splitting Q-values into state values and action advantages. An expert network is designed in addition to the Q-network, which updates each time following the regular offline minibatch update. The algorithm is demonstrated to be more resistant to overestimation bias and achieves more robust performance compared to the baseline Q-learning algorithm.
10
-
11
- \paragraph{Policy Gradient Methods:}
12
- Policy gradient methods are widely used for control in reinforcement learning, particularly in continuous action settings. Natural gradients have been extensively studied within the context of natural gradient actor-critic algorithms and deterministic policy gradients \cite{2209.01820}. The work in \cite{1811.09013} presents the first off-policy policy gradient theorem using emphatic weightings and develops a new actor-critic algorithm called Actor Critic with Emphatic weightings (ACE) that approximates the simplified gradients provided by the theorem. This algorithm is shown to outperform previous off-policy policy gradient methods, such as OffPAC and DPG, in finding the optimal solution.
13
-
14
- \paragraph{Deep Reinforcement Learning:}
15
- Deep reinforcement learning (DRL) combines the power of deep learning with reinforcement learning, achieving remarkable success in various domains, such as finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. The field has seen significant advancements in recent years, with central algorithms such as the deep Q-network, trust region policy optimization, and asynchronous advantage actor-critic being developed \cite{1708.05866}. A detailed review of DRL algorithms and their theoretical justifications, practical limitations, and empirical properties can be found in \cite{1906.10025}.
16
-
17
- \paragraph{Temporal Networks:}
18
- Temporal networks, where links change over time, are essential in understanding the ordering and causality of interactions between nodes in various applications. The work in \cite{2111.01334} proposes a temporal dissimilarity measure for temporal network comparison based on the fastest arrival distance distribution and spectral entropy-based Jensen-Shannon divergence. This measure is shown to effectively discriminate diverse temporal networks with different structures and functional distinctions.
19
-
20
- In conclusion, reinforcement learning has seen significant advancements in recent years, with various algorithms and techniques being developed to address the challenges in the field. From understanding the fundamentals of MDPs to developing advanced DRL algorithms, researchers continue to push the boundaries of what is possible in reinforcement learning and its applications.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_012207/abstract.tex DELETED
File without changes
outputs/outputs_20230421_012207/backgrounds.tex DELETED
@@ -1,16 +0,0 @@
1
- \section{backgrounds}
2
- Reinforcement Learning (RL) is a learning paradigm for solving sequential decision-making problems, where an agent learns to make decisions by interacting with an environment and receiving feedback in the form of rewards or penalties \cite{2001.09608}. The central problem in RL is to find an optimal policy, which is a mapping from states to actions, that maximizes the expected cumulative reward over time.
3
-
4
- One of the foundational theories in RL is the concept of Markov Decision Processes (MDPs), which provide a mathematical framework for modeling decision-making problems. An MDP is defined as a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{2108.11510}. The objective in an MDP is to find a policy $\pi$ that maximizes the expected return $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $t$ is the current time step and $\gamma \in [0, 1]$ is the discount factor that determines the importance of future rewards.
5
-
6
- Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected return when taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by:
7
- \begin{equation}
8
- Q(s, a) \leftarrow Q(s, a) + \alpha [R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a)],
9
- \end{equation}
10
- where $\alpha$ is the learning rate, $s'$ is the next state, and $a'$ is an action in state $s'$ \cite{2106.01134}.
11
-
12
- Deep Reinforcement Learning (DRL) is an extension of RL that employs deep neural networks as function approximators for the value function or policy \cite{2108.11510}. DRL has demonstrated remarkable success in various domains, including finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. However, DRL is known to suffer from data inefficiency due to its trial-and-error learning mechanism, and several methods have been proposed to improve sample efficiency, such as environment modeling, experience transfer, and distributed modifications \cite{2202.05135}.
13
-
14
- Policy gradient methods are another class of RL algorithms that directly optimize the policy by following the gradient of the expected return with respect to the policy parameters \cite{1911.09048}. The policy gradient theorem provides a simplified form for the gradient, which has been widely used in on-policy learning algorithms \cite{1703.02102}. Off-policy learning, where the behavior policy is not necessarily attempting to learn and follow the optimal policy for the given task, has been a challenging area of research, and recent work has proposed the first off-policy policy gradient theorem using emphatic weightings \cite{1811.09013}.
15
-
16
- In summary, Reinforcement Learning aims to solve sequential decision-making problems by finding an optimal policy that maximizes the expected cumulative reward over time. Foundational theories and algorithms such as MDPs, Q-learning, DRL, and policy gradient methods provide the basis for RL research and applications in various domains \cite{2001.09608, 2108.11510}.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_012207/conclusion.tex DELETED
File without changes
outputs/outputs_20230421_012207/experiments.tex DELETED
File without changes
outputs/outputs_20230421_012207/fancyhdr.sty DELETED
@@ -1,485 +0,0 @@
1
- % fancyhdr.sty version 3.2
2
- % Fancy headers and footers for LaTeX.
3
- % Piet van Oostrum,
4
- % Dept of Computer and Information Sciences, University of Utrecht,
5
- % Padualaan 14, P.O. Box 80.089, 3508 TB Utrecht, The Netherlands
6
- % Telephone: +31 30 2532180. Email: piet@cs.uu.nl
7
- % ========================================================================
8
- % LICENCE:
9
- % This file may be distributed under the terms of the LaTeX Project Public
10
- % License, as described in lppl.txt in the base LaTeX distribution.
11
- % Either version 1 or, at your option, any later version.
12
- % ========================================================================
13
- % MODIFICATION HISTORY:
14
- % Sep 16, 1994
15
- % version 1.4: Correction for use with \reversemargin
16
- % Sep 29, 1994:
17
- % version 1.5: Added the \iftopfloat, \ifbotfloat and \iffloatpage commands
18
- % Oct 4, 1994:
19
- % version 1.6: Reset single spacing in headers/footers for use with
20
- % setspace.sty or doublespace.sty
21
- % Oct 4, 1994:
22
- % version 1.7: changed \let\@mkboth\markboth to
23
- % \def\@mkboth{\protect\markboth} to make it more robust
24
- % Dec 5, 1994:
25
- % version 1.8: corrections for amsbook/amsart: define \@chapapp and (more
26
- % importantly) use the \chapter/sectionmark definitions from ps@headings if
27
- % they exist (which should be true for all standard classes).
28
- % May 31, 1995:
29
- % version 1.9: The proposed \renewcommand{\headrulewidth}{\iffloatpage...
30
- % construction in the doc did not work properly with the fancyplain style.
31
- % June 1, 1995:
32
- % version 1.91: The definition of \@mkboth wasn't restored on subsequent
33
- % \pagestyle{fancy}'s.
34
- % June 1, 1995:
35
- % version 1.92: The sequence \pagestyle{fancyplain} \pagestyle{plain}
36
- % \pagestyle{fancy} would erroneously select the plain version.
37
- % June 1, 1995:
38
- % version 1.93: \fancypagestyle command added.
39
- % Dec 11, 1995:
40
- % version 1.94: suggested by Conrad Hughes <chughes@maths.tcd.ie>
41
- % CJCH, Dec 11, 1995: added \footruleskip to allow control over footrule
42
- % position (old hardcoded value of .3\normalbaselineskip is far too high
43
- % when used with very small footer fonts).
44
- % Jan 31, 1996:
45
- % version 1.95: call \@normalsize in the reset code if that is defined,
46
- % otherwise \normalsize.
47
- % this is to solve a problem with ucthesis.cls, as this doesn't
48
- % define \@currsize. Unfortunately for latex209 calling \normalsize doesn't
49
- % work as this is optimized to do very little, so there \@normalsize should
50
- % be called. Hopefully this code works for all versions of LaTeX known to
51
- % mankind.
52
- % April 25, 1996:
53
- % version 1.96: initialize \headwidth to a magic (negative) value to catch
54
- % most common cases that people change it before calling \pagestyle{fancy}.
55
- % Note it can't be initialized when reading in this file, because
56
- % \textwidth could be changed afterwards. This is quite probable.
57
- % We also switch to \MakeUppercase rather than \uppercase and introduce a
58
- % \nouppercase command for use in headers. and footers.
59
- % May 3, 1996:
60
- % version 1.97: Two changes:
61
- % 1. Undo the change in version 1.8 (using the pagestyle{headings} defaults
62
- % for the chapter and section marks. The current version of amsbook and
63
- % amsart classes don't seem to need them anymore. Moreover the standard
64
- % latex classes don't use \markboth if twoside isn't selected, and this is
65
- % confusing as \leftmark doesn't work as expected.
66
- % 2. include a call to \ps@empty in ps@@fancy. This is to solve a problem
67
- % in the amsbook and amsart classes, that make global changes to \topskip,
68
- % which are reset in \ps@empty. Hopefully this doesn't break other things.
69
- % May 7, 1996:
70
- % version 1.98:
71
- % Added % after the line \def\nouppercase
72
- % May 7, 1996:
73
- % version 1.99: This is the alpha version of fancyhdr 2.0
74
- % Introduced the new commands \fancyhead, \fancyfoot, and \fancyhf.
75
- % Changed \headrulewidth, \footrulewidth, \footruleskip to
76
- % macros rather than length parameters, In this way they can be
77
- % conditionalized and they don't consume length registers. There is no need
78
- % to have them as length registers unless you want to do calculations with
79
- % them, which is unlikely. Note that this may make some uses of them
80
- % incompatible (i.e. if you have a file that uses \setlength or \xxxx=)
81
- % May 10, 1996:
82
- % version 1.99a:
83
- % Added a few more % signs
84
- % May 10, 1996:
85
- % version 1.99b:
86
- % Changed the syntax of \f@nfor to be resistent to catcode changes of :=
87
- % Removed the [1] from the defs of \lhead etc. because the parameter is
88
- % consumed by the \@[xy]lhead etc. macros.
89
- % June 24, 1997:
90
- % version 1.99c:
91
- % corrected \nouppercase to also include the protected form of \MakeUppercase
92
- % \global added to manipulation of \headwidth.
93
- % \iffootnote command added.
94
- % Some comments added about \@fancyhead and \@fancyfoot.
95
- % Aug 24, 1998
96
- % version 1.99d
97
- % Changed the default \ps@empty to \ps@@empty in order to allow
98
- % \fancypagestyle{empty} redefinition.
99
- % Oct 11, 2000
100
- % version 2.0
101
- % Added LPPL license clause.
102
- %
103
- % A check for \headheight is added. An errormessage is given (once) if the
104
- % header is too large. Empty headers don't generate the error even if
105
- % \headheight is very small or even 0pt.
106
- % Warning added for the use of 'E' option when twoside option is not used.
107
- % In this case the 'E' fields will never be used.
108
- %
109
- % Mar 10, 2002
110
- % version 2.1beta
111
- % New command: \fancyhfoffset[place]{length}
112
- % defines offsets to be applied to the header/footer to let it stick into
113
- % the margins (if length > 0).
114
- % place is like in fancyhead, except that only E,O,L,R can be used.
115
- % This replaces the old calculation based on \headwidth and the marginpar
116
- % area.
117
- % \headwidth will be dynamically calculated in the headers/footers when
118
- % this is used.
119
- %
120
- % Mar 26, 2002
121
- % version 2.1beta2
122
- % \fancyhfoffset now also takes h,f as possible letters in the argument to
123
- % allow the header and footer widths to be different.
124
- % New commands \fancyheadoffset and \fancyfootoffset added comparable to
125
- % \fancyhead and \fancyfoot.
126
- % Errormessages and warnings have been made more informative.
127
- %
128
- % Dec 9, 2002
129
- % version 2.1
130
- % The defaults for \footrulewidth, \plainheadrulewidth and
131
- % \plainfootrulewidth are changed from \z@skip to 0pt. In this way when
132
- % someone inadvertantly uses \setlength to change any of these, the value
133
- % of \z@skip will not be changed, rather an errormessage will be given.
134
-
135
- % March 3, 2004
136
- % Release of version 3.0
137
-
138
- % Oct 7, 2004
139
- % version 3.1
140
- % Added '\endlinechar=13' to \fancy@reset to prevent problems with
141
- % includegraphics in header when verbatiminput is active.
142
-
143
- % March 22, 2005
144
- % version 3.2
145
- % reset \everypar (the real one) in \fancy@reset because spanish.ldf does
146
- % strange things with \everypar between << and >>.
147
-
148
- \def\ifancy@mpty#1{\def\temp@a{#1}\ifx\temp@a\@empty}
149
-
150
- \def\fancy@def#1#2{\ifancy@mpty{#2}\fancy@gbl\def#1{\leavevmode}\else
151
- \fancy@gbl\def#1{#2\strut}\fi}
152
-
153
- \let\fancy@gbl\global
154
-
155
- \def\@fancyerrmsg#1{%
156
- \ifx\PackageError\undefined
157
- \errmessage{#1}\else
158
- \PackageError{Fancyhdr}{#1}{}\fi}
159
- \def\@fancywarning#1{%
160
- \ifx\PackageWarning\undefined
161
- \errmessage{#1}\else
162
- \PackageWarning{Fancyhdr}{#1}{}\fi}
163
-
164
- % Usage: \@forc \var{charstring}{command to be executed for each char}
165
- % This is similar to LaTeX's \@tfor, but expands the charstring.
166
-
167
- \def\@forc#1#2#3{\expandafter\f@rc\expandafter#1\expandafter{#2}{#3}}
168
- \def\f@rc#1#2#3{\def\temp@ty{#2}\ifx\@empty\temp@ty\else
169
- \f@@rc#1#2\f@@rc{#3}\fi}
170
- \def\f@@rc#1#2#3\f@@rc#4{\def#1{#2}#4\f@rc#1{#3}{#4}}
171
-
172
- % Usage: \f@nfor\name:=list\do{body}
173
- % Like LaTeX's \@for but an empty list is treated as a list with an empty
174
- % element
175
-
176
- \newcommand{\f@nfor}[3]{\edef\@fortmp{#2}%
177
- \expandafter\@forloop#2,\@nil,\@nil\@@#1{#3}}
178
-
179
- % Usage: \def@ult \cs{defaults}{argument}
180
- % sets \cs to the characters from defaults appearing in argument
181
- % or defaults if it would be empty. All characters are lowercased.
182
-
183
- \newcommand\def@ult[3]{%
184
- \edef\temp@a{\lowercase{\edef\noexpand\temp@a{#3}}}\temp@a
185
- \def#1{}%
186
- \@forc\tmpf@ra{#2}%
187
- {\expandafter\if@in\tmpf@ra\temp@a{\edef#1{#1\tmpf@ra}}{}}%
188
- \ifx\@empty#1\def#1{#2}\fi}
189
- %
190
- % \if@in <char><set><truecase><falsecase>
191
- %
192
- \newcommand{\if@in}[4]{%
193
- \edef\temp@a{#2}\def\temp@b##1#1##2\temp@b{\def\temp@b{##1}}%
194
- \expandafter\temp@b#2#1\temp@b\ifx\temp@a\temp@b #4\else #3\fi}
195
-
196
- \newcommand{\fancyhead}{\@ifnextchar[{\f@ncyhf\fancyhead h}%
197
- {\f@ncyhf\fancyhead h[]}}
198
- \newcommand{\fancyfoot}{\@ifnextchar[{\f@ncyhf\fancyfoot f}%
199
- {\f@ncyhf\fancyfoot f[]}}
200
- \newcommand{\fancyhf}{\@ifnextchar[{\f@ncyhf\fancyhf{}}%
201
- {\f@ncyhf\fancyhf{}[]}}
202
-
203
- % New commands for offsets added
204
-
205
- \newcommand{\fancyheadoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyheadoffset h}%
206
- {\f@ncyhfoffs\fancyheadoffset h[]}}
207
- \newcommand{\fancyfootoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyfootoffset f}%
208
- {\f@ncyhfoffs\fancyfootoffset f[]}}
209
- \newcommand{\fancyhfoffset}{\@ifnextchar[{\f@ncyhfoffs\fancyhfoffset{}}%
210
- {\f@ncyhfoffs\fancyhfoffset{}[]}}
211
-
212
- % The header and footer fields are stored in command sequences with
213
- % names of the form: \f@ncy<x><y><z> with <x> for [eo], <y> from [lcr]
214
- % and <z> from [hf].
215
-
216
- \def\f@ncyhf#1#2[#3]#4{%
217
- \def\temp@c{}%
218
- \@forc\tmpf@ra{#3}%
219
- {\expandafter\if@in\tmpf@ra{eolcrhf,EOLCRHF}%
220
- {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
221
- \ifx\@empty\temp@c\else
222
- \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
223
- [#3]}%
224
- \fi
225
- \f@nfor\temp@c{#3}%
226
- {\def@ult\f@@@eo{eo}\temp@c
227
- \if@twoside\else
228
- \if\f@@@eo e\@fancywarning
229
- {\string#1's `E' option without twoside option is useless}\fi\fi
230
- \def@ult\f@@@lcr{lcr}\temp@c
231
- \def@ult\f@@@hf{hf}{#2\temp@c}%
232
- \@forc\f@@eo\f@@@eo
233
- {\@forc\f@@lcr\f@@@lcr
234
- {\@forc\f@@hf\f@@@hf
235
- {\expandafter\fancy@def\csname
236
- f@ncy\f@@eo\f@@lcr\f@@hf\endcsname
237
- {#4}}}}}}
238
-
239
- \def\f@ncyhfoffs#1#2[#3]#4{%
240
- \def\temp@c{}%
241
- \@forc\tmpf@ra{#3}%
242
- {\expandafter\if@in\tmpf@ra{eolrhf,EOLRHF}%
243
- {}{\edef\temp@c{\temp@c\tmpf@ra}}}%
244
- \ifx\@empty\temp@c\else
245
- \@fancyerrmsg{Illegal char `\temp@c' in \string#1 argument:
246
- [#3]}%
247
- \fi
248
- \f@nfor\temp@c{#3}%
249
- {\def@ult\f@@@eo{eo}\temp@c
250
- \if@twoside\else
251
- \if\f@@@eo e\@fancywarning
252
- {\string#1's `E' option without twoside option is useless}\fi\fi
253
- \def@ult\f@@@lcr{lr}\temp@c
254
- \def@ult\f@@@hf{hf}{#2\temp@c}%
255
- \@forc\f@@eo\f@@@eo
256
- {\@forc\f@@lcr\f@@@lcr
257
- {\@forc\f@@hf\f@@@hf
258
- {\expandafter\setlength\csname
259
- f@ncyO@\f@@eo\f@@lcr\f@@hf\endcsname
260
- {#4}}}}}%
261
- \fancy@setoffs}
262
-
263
- % Fancyheadings version 1 commands. These are more or less deprecated,
264
- % but they continue to work.
265
-
266
- \newcommand{\lhead}{\@ifnextchar[{\@xlhead}{\@ylhead}}
267
- \def\@xlhead[#1]#2{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#2}}
268
- \def\@ylhead#1{\fancy@def\f@ncyelh{#1}\fancy@def\f@ncyolh{#1}}
269
-
270
- \newcommand{\chead}{\@ifnextchar[{\@xchead}{\@ychead}}
271
- \def\@xchead[#1]#2{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#2}}
272
- \def\@ychead#1{\fancy@def\f@ncyech{#1}\fancy@def\f@ncyoch{#1}}
273
-
274
- \newcommand{\rhead}{\@ifnextchar[{\@xrhead}{\@yrhead}}
275
- \def\@xrhead[#1]#2{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#2}}
276
- \def\@yrhead#1{\fancy@def\f@ncyerh{#1}\fancy@def\f@ncyorh{#1}}
277
-
278
- \newcommand{\lfoot}{\@ifnextchar[{\@xlfoot}{\@ylfoot}}
279
- \def\@xlfoot[#1]#2{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#2}}
280
- \def\@ylfoot#1{\fancy@def\f@ncyelf{#1}\fancy@def\f@ncyolf{#1}}
281
-
282
- \newcommand{\cfoot}{\@ifnextchar[{\@xcfoot}{\@ycfoot}}
283
- \def\@xcfoot[#1]#2{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#2}}
284
- \def\@ycfoot#1{\fancy@def\f@ncyecf{#1}\fancy@def\f@ncyocf{#1}}
285
-
286
- \newcommand{\rfoot}{\@ifnextchar[{\@xrfoot}{\@yrfoot}}
287
- \def\@xrfoot[#1]#2{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#2}}
288
- \def\@yrfoot#1{\fancy@def\f@ncyerf{#1}\fancy@def\f@ncyorf{#1}}
289
-
290
- \newlength{\fancy@headwidth}
291
- \let\headwidth\fancy@headwidth
292
- \newlength{\f@ncyO@elh}
293
- \newlength{\f@ncyO@erh}
294
- \newlength{\f@ncyO@olh}
295
- \newlength{\f@ncyO@orh}
296
- \newlength{\f@ncyO@elf}
297
- \newlength{\f@ncyO@erf}
298
- \newlength{\f@ncyO@olf}
299
- \newlength{\f@ncyO@orf}
300
- \newcommand{\headrulewidth}{0.4pt}
301
- \newcommand{\footrulewidth}{0pt}
302
- \newcommand{\footruleskip}{.3\normalbaselineskip}
303
-
304
- % Fancyplain stuff shouldn't be used anymore (rather
305
- % \fancypagestyle{plain} should be used), but it must be present for
306
- % compatibility reasons.
307
-
308
- \newcommand{\plainheadrulewidth}{0pt}
309
- \newcommand{\plainfootrulewidth}{0pt}
310
- \newif\if@fancyplain \@fancyplainfalse
311
- \def\fancyplain#1#2{\if@fancyplain#1\else#2\fi}
312
-
313
- \headwidth=-123456789sp %magic constant
314
-
315
- % Command to reset various things in the headers:
316
- % a.o. single spacing (taken from setspace.sty)
317
- % and the catcode of ^^M (so that epsf files in the header work if a
318
- % verbatim crosses a page boundary)
319
- % It also defines a \nouppercase command that disables \uppercase and
320
- % \Makeuppercase. It can only be used in the headers and footers.
321
- \let\fnch@everypar\everypar% save real \everypar because of spanish.ldf
322
- \def\fancy@reset{\fnch@everypar{}\restorecr\endlinechar=13
323
- \def\baselinestretch{1}%
324
- \def\nouppercase##1{{\let\uppercase\relax\let\MakeUppercase\relax
325
- \expandafter\let\csname MakeUppercase \endcsname\relax##1}}%
326
- \ifx\undefined\@newbaseline% NFSS not present; 2.09 or 2e
327
- \ifx\@normalsize\undefined \normalsize % for ucthesis.cls
328
- \else \@normalsize \fi
329
- \else% NFSS (2.09) present
330
- \@newbaseline%
331
- \fi}
332
-
333
- % Initialization of the head and foot text.
334
-
335
- % The default values still contain \fancyplain for compatibility.
336
- \fancyhf{} % clear all
337
- % lefthead empty on ``plain'' pages, \rightmark on even, \leftmark on odd pages
338
- % evenhead empty on ``plain'' pages, \leftmark on even, \rightmark on odd pages
339
- \if@twoside
340
- \fancyhead[el,or]{\fancyplain{}{\sl\rightmark}}
341
- \fancyhead[er,ol]{\fancyplain{}{\sl\leftmark}}
342
- \else
343
- \fancyhead[l]{\fancyplain{}{\sl\rightmark}}
344
- \fancyhead[r]{\fancyplain{}{\sl\leftmark}}
345
- \fi
346
- \fancyfoot[c]{\rm\thepage} % page number
347
-
348
- % Use box 0 as a temp box and dimen 0 as temp dimen.
349
- % This can be done, because this code will always
350
- % be used inside another box, and therefore the changes are local.
351
-
352
- \def\@fancyvbox#1#2{\setbox0\vbox{#2}\ifdim\ht0>#1\@fancywarning
353
- {\string#1 is too small (\the#1): ^^J Make it at least \the\ht0.^^J
354
- We now make it that large for the rest of the document.^^J
355
- This may cause the page layout to be inconsistent, however\@gobble}%
356
- \dimen0=#1\global\setlength{#1}{\ht0}\ht0=\dimen0\fi
357
- \box0}
358
-
359
- % Put together a header or footer given the left, center and
360
- % right text, fillers at left and right and a rule.
361
- % The \lap commands put the text into an hbox of zero size,
362
- % so overlapping text does not generate an errormessage.
363
- % These macros have 5 parameters:
364
- % 1. LEFTSIDE BEARING % This determines at which side the header will stick
365
- % out. When \fancyhfoffset is used this calculates \headwidth, otherwise
366
- % it is \hss or \relax (after expansion).
367
- % 2. \f@ncyolh, \f@ncyelh, \f@ncyolf or \f@ncyelf. This is the left component.
368
- % 3. \f@ncyoch, \f@ncyech, \f@ncyocf or \f@ncyecf. This is the middle comp.
369
- % 4. \f@ncyorh, \f@ncyerh, \f@ncyorf or \f@ncyerf. This is the right component.
370
- % 5. RIGHTSIDE BEARING. This is always \relax or \hss (after expansion).
371
-
372
- \def\@fancyhead#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
373
- \@fancyvbox\headheight{\hbox
374
- {\rlap{\parbox[b]{\headwidth}{\raggedright#2}}\hfill
375
- \parbox[b]{\headwidth}{\centering#3}\hfill
376
- \llap{\parbox[b]{\headwidth}{\raggedleft#4}}}\headrule}}#5}
377
-
378
- \def\@fancyfoot#1#2#3#4#5{#1\hbox to\headwidth{\fancy@reset
379
- \@fancyvbox\footskip{\footrule
380
- \hbox{\rlap{\parbox[t]{\headwidth}{\raggedright#2}}\hfill
381
- \parbox[t]{\headwidth}{\centering#3}\hfill
382
- \llap{\parbox[t]{\headwidth}{\raggedleft#4}}}}}#5}
383
-
384
- \def\headrule{{\if@fancyplain\let\headrulewidth\plainheadrulewidth\fi
385
- \hrule\@height\headrulewidth\@width\headwidth \vskip-\headrulewidth}}
386
-
387
- \def\footrule{{\if@fancyplain\let\footrulewidth\plainfootrulewidth\fi
388
- \vskip-\footruleskip\vskip-\footrulewidth
389
- \hrule\@width\headwidth\@height\footrulewidth\vskip\footruleskip}}
390
-
391
- \def\ps@fancy{%
392
- \@ifundefined{@chapapp}{\let\@chapapp\chaptername}{}%for amsbook
393
- %
394
- % Define \MakeUppercase for old LaTeXen.
395
- % Note: we used \def rather than \let, so that \let\uppercase\relax (from
396
- % the version 1 documentation) will still work.
397
- %
398
- \@ifundefined{MakeUppercase}{\def\MakeUppercase{\uppercase}}{}%
399
- \@ifundefined{chapter}{\def\sectionmark##1{\markboth
400
- {\MakeUppercase{\ifnum \c@secnumdepth>\z@
401
- \thesection\hskip 1em\relax \fi ##1}}{}}%
402
- \def\subsectionmark##1{\markright {\ifnum \c@secnumdepth >\@ne
403
- \thesubsection\hskip 1em\relax \fi ##1}}}%
404
- {\def\chaptermark##1{\markboth {\MakeUppercase{\ifnum \c@secnumdepth>\m@ne
405
- \@chapapp\ \thechapter. \ \fi ##1}}{}}%
406
- \def\sectionmark##1{\markright{\MakeUppercase{\ifnum \c@secnumdepth >\z@
407
- \thesection. \ \fi ##1}}}}%
408
- %\csname ps@headings\endcsname % use \ps@headings defaults if they exist
409
- \ps@@fancy
410
- \gdef\ps@fancy{\@fancyplainfalse\ps@@fancy}%
411
- % Initialize \headwidth if the user didn't
412
- %
413
- \ifdim\headwidth<0sp
414
- %
415
- % This catches the case that \headwidth hasn't been initialized and the
416
- % case that the user added something to \headwidth in the expectation that
417
- % it was initialized to \textwidth. We compensate this now. This loses if
418
- % the user intended to multiply it by a factor. But that case is more
419
- % likely done by saying something like \headwidth=1.2\textwidth.
420
- % The doc says you have to change \headwidth after the first call to
421
- % \pagestyle{fancy}. This code is just to catch the most common cases were
422
- % that requirement is violated.
423
- %
424
- \global\advance\headwidth123456789sp\global\advance\headwidth\textwidth
425
- \fi}
426
- \def\ps@fancyplain{\ps@fancy \let\ps@plain\ps@plain@fancy}
427
- \def\ps@plain@fancy{\@fancyplaintrue\ps@@fancy}
428
- \let\ps@@empty\ps@empty
429
- \def\ps@@fancy{%
430
- \ps@@empty % This is for amsbook/amsart, which do strange things with \topskip
431
- \def\@mkboth{\protect\markboth}%
432
- \def\@oddhead{\@fancyhead\fancy@Oolh\f@ncyolh\f@ncyoch\f@ncyorh\fancy@Oorh}%
433
- \def\@oddfoot{\@fancyfoot\fancy@Oolf\f@ncyolf\f@ncyocf\f@ncyorf\fancy@Oorf}%
434
- \def\@evenhead{\@fancyhead\fancy@Oelh\f@ncyelh\f@ncyech\f@ncyerh\fancy@Oerh}%
435
- \def\@evenfoot{\@fancyfoot\fancy@Oelf\f@ncyelf\f@ncyecf\f@ncyerf\fancy@Oerf}%
436
- }
437
- % Default definitions for compatibility mode:
438
- % These cause the header/footer to take the defined \headwidth as width
439
- % And to shift in the direction of the marginpar area
440
-
441
- \def\fancy@Oolh{\if@reversemargin\hss\else\relax\fi}
442
- \def\fancy@Oorh{\if@reversemargin\relax\else\hss\fi}
443
- \let\fancy@Oelh\fancy@Oorh
444
- \let\fancy@Oerh\fancy@Oolh
445
-
446
- \let\fancy@Oolf\fancy@Oolh
447
- \let\fancy@Oorf\fancy@Oorh
448
- \let\fancy@Oelf\fancy@Oelh
449
- \let\fancy@Oerf\fancy@Oerh
450
-
451
- % New definitions for the use of \fancyhfoffset
452
- % These calculate the \headwidth from \textwidth and the specified offsets.
453
-
454
- \def\fancy@offsolh{\headwidth=\textwidth\advance\headwidth\f@ncyO@olh
455
- \advance\headwidth\f@ncyO@orh\hskip-\f@ncyO@olh}
456
- \def\fancy@offselh{\headwidth=\textwidth\advance\headwidth\f@ncyO@elh
457
- \advance\headwidth\f@ncyO@erh\hskip-\f@ncyO@elh}
458
-
459
- \def\fancy@offsolf{\headwidth=\textwidth\advance\headwidth\f@ncyO@olf
460
- \advance\headwidth\f@ncyO@orf\hskip-\f@ncyO@olf}
461
- \def\fancy@offself{\headwidth=\textwidth\advance\headwidth\f@ncyO@elf
462
- \advance\headwidth\f@ncyO@erf\hskip-\f@ncyO@elf}
463
-
464
- \def\fancy@setoffs{%
465
- % Just in case \let\headwidth\textwidth was used
466
- \fancy@gbl\let\headwidth\fancy@headwidth
467
- \fancy@gbl\let\fancy@Oolh\fancy@offsolh
468
- \fancy@gbl\let\fancy@Oelh\fancy@offselh
469
- \fancy@gbl\let\fancy@Oorh\hss
470
- \fancy@gbl\let\fancy@Oerh\hss
471
- \fancy@gbl\let\fancy@Oolf\fancy@offsolf
472
- \fancy@gbl\let\fancy@Oelf\fancy@offself
473
- \fancy@gbl\let\fancy@Oorf\hss
474
- \fancy@gbl\let\fancy@Oerf\hss}
475
-
476
- \newif\iffootnote
477
- \let\latex@makecol\@makecol
478
- \def\@makecol{\ifvoid\footins\footnotetrue\else\footnotefalse\fi
479
- \let\topfloat\@toplist\let\botfloat\@botlist\latex@makecol}
480
- \def\iftopfloat#1#2{\ifx\topfloat\empty #2\else #1\fi}
481
- \def\ifbotfloat#1#2{\ifx\botfloat\empty #2\else #1\fi}
482
- \def\iffloatpage#1#2{\if@fcolmade #1\else #2\fi}
483
-
484
- \newcommand{\fancypagestyle}[2]{%
485
- \@namedef{ps@#1}{\let\fancy@gbl\relax#2\relax\ps@fancy}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_012207/generation.log DELETED
@@ -1,105 +0,0 @@
1
- INFO:utils.gpt_interaction:{"Reinforcement Learning": 5, "Q-Learning": 4, "Policy Gradient": 4, "Deep Reinforcement Learning": 3, "Multi-Agent Systems": 2}
2
- INFO:root:For generating keywords, 120 tokens have been used (79 for prompts; 41 for completion). 120 tokens have been used in total.
3
- INFO:utils.prompts:Generated prompts for introduction: I am writing a machine learning survey about 'Reinforcement Learning'.
4
- You need to write the introduction section. Please include five paragraph: Establishing the motivation for the research. Explaining its importance and relevance to the AI community. Clearly state the problem you're addressing, your proposed solution, and the specific research questions or objectives. Briefly mention key related work for context. Explain the main differences from your work.
5
- Please read the following references:
6
- {'2001.09608': ' A lifelong reinforcement learning system is a learning system that has the\nability to learn through trail-and-error interaction with the environment over\nits lifetime. In this paper, I give some arguments to show that the traditional\nreinforcement learning paradigm fails to model this type of learning system.\nSome insights into lifelong reinforcement learning are provided, along with a\nsimplistic prototype lifelong reinforcement learning system.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2202.05135': ' It can largely benefit the reinforcement learning process of each agent if\nmultiple agents perform their separate reinforcement learning tasks\ncooperatively. Different from multi-agent reinforcement learning where multiple\nagents are in a common environment and should learn to cooperate or compete\nwith each other, in this case each agent has its separate environment and only\ncommunicate with others to share knowledge without any cooperative or\ncompetitive behaviour as a learning outcome. In fact, this learning scenario is\nnot well understood yet and not well formulated. As the first effort, we\npropose group-agent reinforcement learning as a formulation of this scenario\nand the third type of reinforcement learning problem with respect to\nsingle-agent and multi-agent reinforcement learning. We then propose the first\ndistributed reinforcement learning framework called DDAL (Decentralised\nDistributed Asynchronous Learning) designed for group-agent reinforcement\nlearning. We show through experiments that DDAL achieved desirable performance\nwith very stable training and has good scalability.\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '2009.07888': ' Reinforcement learning is a learning paradigm for solving sequential\ndecision-making problems. Recent years have witnessed remarkable progress in\nreinforcement learning upon the fast development of deep neural networks. Along\nwith the promising prospects of reinforcement learning in numerous domains such\nas robotics and game-playing, transfer learning has arisen to tackle various\nchallenges faced by reinforcement learning, by transferring knowledge from\nexternal expertise to facilitate the efficiency and effectiveness of the\nlearning process. In this survey, we systematically investigate the recent\nprogress of transfer learning approaches in the context of deep reinforcement\nlearning. Specifically, we provide a framework for categorizing the\nstate-of-the-art transfer learning approaches, under which we analyze their\ngoals, methodologies, compatible reinforcement learning backbones, and\npractical applications. We also draw connections between transfer learning and\nother relevant topics from the reinforcement learning perspective and explore\ntheir potential challenges that await future research progress.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1009.2998': ' The boundedness tests for the number of compact integral manifolds of\nautonomous ordinary differential systems, of autonomous total differential\nsystems, of linear systems of partial differential equations, of Pfaff systems\nof equations, and of systems of exterior differential equations are proved.\n', '1911.09048': ' This thesis (defended 10/07/2019) develops a theory of networks of hybrid\nopen systems and morphisms. It builds upon a framework of networks of\ncontinuous-time open systems as product and interconnection. We work out\ncategorical notions for hybrid systems, deterministic hybrid systems, hybrid\nopen systems, networks of hybrid open systems, and morphisms of networks of\nhybrid open systems.\n We also develop categorical notions for abstract systems, abstract open\nsystems, networks of abstract open systems, and morphisms of networks of\nabstract open systems. We show that a collection of relations holding among\npairs of systems induces a relation between interconnected systems. We use this\nresult for abstract systems to prove a corresponding result for networks of\nhybrid systems.\n This result translates as saying that our procedure for building networks\npreserves morphisms of open systems: a collection of morphisms of (sub)systems\nis sent to a morphism of networked systems. We thus both justify our formalism\nand concretize the intuition that a network is a collection of systems pieced\ntogether in a certain way.\n'}
7
- Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph.
8
- Put your response (do not include \section{...}) in the following Python script:with open("introduction.tex", "w") as f: f.write(r'''your_response''')
9
- INFO:utils.gpt_interaction:Reinforcement Learning (RL) has emerged as a powerful learning paradigm for solving sequential decision-making problems, with significant advancements made in recent years due to the integration of deep neural networks \cite{2108.11510}. As a result, deep reinforcement learning has demonstrated remarkable success in various domains, including finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. However, traditional RL paradigms face challenges in modeling lifelong learning systems, which learn through trial-and-error interactions with the environment over their lifetime \cite{2001.09608}. Moreover, data inefficiency caused by trial-and-error learning mechanisms makes deep RL difficult to apply in a wide range of areas \cite{2212.00253}. This survey aims to address these challenges by exploring recent advancements in reinforcement learning, focusing on the development of more efficient and effective learning algorithms.
10
-
11
- The problem we address is the development of more efficient and effective reinforcement learning algorithms that can learn from trial-and-error interactions with the environment, while also being able to transfer knowledge from external expertise to facilitate the learning process \cite{2009.07888}. Our proposed solution involves investigating recent advancements in RL, such as deep RL in computer vision \cite{2108.11510}, group-agent reinforcement learning \cite{2202.05135}, and distributed deep reinforcement learning \cite{2212.00253}. We aim to answer the following research questions: (1) How can we improve the efficiency and effectiveness of reinforcement learning algorithms? (2) What are the key advancements in RL that can be leveraged to address the challenges faced by traditional RL paradigms?
12
-
13
- Related work in the field of reinforcement learning includes the development of algorithms such as Q-learning, Double Q-learning, and Dueling Q-learning \cite{2106.14642, 2106.01134, 2012.01100}. Additionally, transfer learning approaches have been explored to tackle various challenges faced by RL, by transferring knowledge from external expertise to facilitate the learning process \cite{2009.07888}. Furthermore, recent research has focused on the development of distributed deep RL algorithms, which have shown potential in various applications such as human-computer gaming and intelligent transportation \cite{2212.00253}.
14
-
15
- Our work differs from the existing literature in that we aim to provide a comprehensive survey of the recent advancements in reinforcement learning, focusing on the development of more efficient and effective learning algorithms. By investigating various RL techniques and methodologies, we hope to identify key advancements that can be leveraged to address the challenges faced by traditional RL paradigms. Moreover, our survey will not only discuss the algorithms themselves but also explore their applications in various domains, providing a more in-depth understanding of the potential impact of these advancements on the AI community.
16
-
17
- In conclusion, this survey will provide a detailed overview of recent advancements in reinforcement learning, with a focus on addressing the challenges faced by traditional RL paradigms and improving the efficiency and effectiveness of learning algorithms. By investigating various RL techniques and methodologies, we aim to identify key advancements that can be leveraged to address these challenges and contribute to the ongoing development of reinforcement learning as a powerful learning paradigm for solving sequential decision-making problems in various domains.
18
- INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message.
19
-
20
- INFO:utils.gpt_interaction:assistant_message: Reinforcement Learning (RL) has emerged as a powerful learning paradigm for solving sequential decision-making problems, with significant advancements made in recent years due to the integration of deep neural networks \cite{2108.11510}. As a result, deep reinforcement learning has demonstrated remarkable success in various domains, including finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. However, traditional RL paradigms face challenges in modeling lifelong learning systems, which learn through trial-and-error interactions with the environment over their lifetime \cite{2001.09608}. Moreover, data inefficiency caused by trial-and-error learning mechanisms makes deep RL difficult to apply in a wide range of areas \cite{2212.00253}. This survey aims to address these challenges by exploring recent advancements in reinforcement learning, focusing on the development of more efficient and effective learning algorithms.
21
-
22
- The problem we address is the development of more efficient and effective reinforcement learning algorithms that can learn from trial-and-error interactions with the environment, while also being able to transfer knowledge from external expertise to facilitate the learning process \cite{2009.07888}. Our proposed solution involves investigating recent advancements in RL, such as deep RL in computer vision \cite{2108.11510}, group-agent reinforcement learning \cite{2202.05135}, and distributed deep reinforcement learning \cite{2212.00253}. We aim to answer the following research questions: (1) How can we improve the efficiency and effectiveness of reinforcement learning algorithms? (2) What are the key advancements in RL that can be leveraged to address the challenges faced by traditional RL paradigms?
23
-
24
- Related work in the field of reinforcement learning includes the development of algorithms such as Q-learning, Double Q-learning, and Dueling Q-learning \cite{2106.14642, 2106.01134, 2012.01100}. Additionally, transfer learning approaches have been explored to tackle various challenges faced by RL, by transferring knowledge from external expertise to facilitate the learning process \cite{2009.07888}. Furthermore, recent research has focused on the development of distributed deep RL algorithms, which have shown potential in various applications such as human-computer gaming and intelligent transportation \cite{2212.00253}.
25
-
26
- Our work differs from the existing literature in that we aim to provide a comprehensive survey of the recent advancements in reinforcement learning, focusing on the development of more efficient and effective learning algorithms. By investigating various RL techniques and methodologies, we hope to identify key advancements that can be leveraged to address the challenges faced by traditional RL paradigms. Moreover, our survey will not only discuss the algorithms themselves but also explore their applications in various domains, providing a more in-depth understanding of the potential impact of these advancements on the AI community.
27
-
28
- In conclusion, this survey will provide a detailed overview of recent advancements in reinforcement learning, with a focus on addressing the challenges faced by traditional RL paradigms and improving the efficiency and effectiveness of learning algorithms. By investigating various RL techniques and methodologies, we aim to identify key advancements that can be leveraged to address these challenges and contribute to the ongoing development of reinforcement learning as a powerful learning paradigm for solving sequential decision-making problems in various domains.
29
- INFO:root:For generating introduction, 3914 tokens have been used (3257 for prompts; 657 for completion). 4034 tokens have been used in total.
30
- INFO:utils.prompts:Generated prompts for related works: I am writing a machine learning survey about 'Reinforcement Learning'.
31
- You need to write the related works section. Please discuss key publications, methods, and techniques in your research area. Analyze the strengths and weaknesses of existing methods, and present the related works in a logical manner, often chronologically. Consider using a taxonomy or categorization to structure the discussion. Do not use \section{...} or \subsection{...}; use \paragraph{...} instead.
32
- Please read the following references:
33
- {'2001.09608': ' A lifelong reinforcement learning system is a learning system that has the\nability to learn through trail-and-error interaction with the environment over\nits lifetime. In this paper, I give some arguments to show that the traditional\nreinforcement learning paradigm fails to model this type of learning system.\nSome insights into lifelong reinforcement learning are provided, along with a\nsimplistic prototype lifelong reinforcement learning system.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2202.05135': ' It can largely benefit the reinforcement learning process of each agent if\nmultiple agents perform their separate reinforcement learning tasks\ncooperatively. Different from multi-agent reinforcement learning where multiple\nagents are in a common environment and should learn to cooperate or compete\nwith each other, in this case each agent has its separate environment and only\ncommunicate with others to share knowledge without any cooperative or\ncompetitive behaviour as a learning outcome. In fact, this learning scenario is\nnot well understood yet and not well formulated. As the first effort, we\npropose group-agent reinforcement learning as a formulation of this scenario\nand the third type of reinforcement learning problem with respect to\nsingle-agent and multi-agent reinforcement learning. We then propose the first\ndistributed reinforcement learning framework called DDAL (Decentralised\nDistributed Asynchronous Learning) designed for group-agent reinforcement\nlearning. We show through experiments that DDAL achieved desirable performance\nwith very stable training and has good scalability.\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '2009.07888': ' Reinforcement learning is a learning paradigm for solving sequential\ndecision-making problems. Recent years have witnessed remarkable progress in\nreinforcement learning upon the fast development of deep neural networks. Along\nwith the promising prospects of reinforcement learning in numerous domains such\nas robotics and game-playing, transfer learning has arisen to tackle various\nchallenges faced by reinforcement learning, by transferring knowledge from\nexternal expertise to facilitate the efficiency and effectiveness of the\nlearning process. In this survey, we systematically investigate the recent\nprogress of transfer learning approaches in the context of deep reinforcement\nlearning. Specifically, we provide a framework for categorizing the\nstate-of-the-art transfer learning approaches, under which we analyze their\ngoals, methodologies, compatible reinforcement learning backbones, and\npractical applications. We also draw connections between transfer learning and\nother relevant topics from the reinforcement learning perspective and explore\ntheir potential challenges that await future research progress.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1009.2998': ' The boundedness tests for the number of compact integral manifolds of\nautonomous ordinary differential systems, of autonomous total differential\nsystems, of linear systems of partial differential equations, of Pfaff systems\nof equations, and of systems of exterior differential equations are proved.\n', '1911.09048': ' This thesis (defended 10/07/2019) develops a theory of networks of hybrid\nopen systems and morphisms. It builds upon a framework of networks of\ncontinuous-time open systems as product and interconnection. We work out\ncategorical notions for hybrid systems, deterministic hybrid systems, hybrid\nopen systems, networks of hybrid open systems, and morphisms of networks of\nhybrid open systems.\n We also develop categorical notions for abstract systems, abstract open\nsystems, networks of abstract open systems, and morphisms of networks of\nabstract open systems. We show that a collection of relations holding among\npairs of systems induces a relation between interconnected systems. We use this\nresult for abstract systems to prove a corresponding result for networks of\nhybrid systems.\n This result translates as saying that our procedure for building networks\npreserves morphisms of open systems: a collection of morphisms of (sub)systems\nis sent to a morphism of networked systems. We thus both justify our formalism\nand concretize the intuition that a network is a collection of systems pieced\ntogether in a certain way.\n'}
34
- Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph.
35
- Put your response (do not include \section{...}) in the following Python script:with open("related works.tex", "w") as f: f.write(r'''your_response''')
36
- INFO:utils.gpt_interaction:\paragraph{Reinforcement Learning and Q-Learning}
37
- Reinforcement learning is a learning paradigm for solving sequential decision-making problems, and Q-learning is one of its fundamental algorithms \cite{2009.07888}. The Q-learning algorithm, however, is known to suffer from maximization bias, which leads to the overestimation of action values \cite{2012.01100}. To address this issue, Double Q-learning has been proposed, which mitigates the overestimation problem but may result in slower convergence and increased memory requirements \cite{2303.08631}. Another approach to tackle the maximization bias is Self-correcting Q-learning, which balances the overestimation and underestimation issues while maintaining similar convergence guarantees as Q-learning \cite{2012.01100}.
38
-
39
- \paragraph{Deep Reinforcement Learning}
40
- Deep reinforcement learning (DRL) combines reinforcement learning with deep neural networks to tackle more complex problems \cite{2108.11510}. DRL has been successfully applied in various domains, including computer vision, where it has been used for tasks such as landmark localization, object detection, object tracking, image registration, image segmentation, and video analysis \cite{2108.11510}. Despite its success, DRL suffers from data inefficiency due to its trial-and-error learning mechanism, leading to the development of various sample-efficient methods, such as distributed deep reinforcement learning \cite{2212.00253}.
41
-
42
- \paragraph{Transfer Learning in Reinforcement Learning}
43
- Transfer learning has emerged as a promising approach to address the challenges faced by reinforcement learning, such as data inefficiency, by transferring knowledge from external sources to facilitate the learning process \cite{2009.07888}. A systematic investigation of transfer learning approaches in the context of deep reinforcement learning has been conducted, categorizing these approaches based on their goals, methodologies, compatible reinforcement learning backbones, and practical applications \cite{2009.07888}.
44
-
45
- \paragraph{Policy Gradient Methods}
46
- Policy gradient methods are widely used in reinforcement learning, particularly for continuous action settings. Natural policy gradients have been proposed as a more efficient alternative to traditional policy gradients, forming the foundation of contemporary reinforcement learning algorithms, such as Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. Off-policy policy gradient methods have also been developed, with the introduction of Actor Critic with Emphatic weightings (ACE), which addresses the issues of previous off-policy policy gradient methods like OffPAC and DPG \cite{1811.09013}.
47
-
48
- \paragraph{Group-Agent Reinforcement Learning}
49
- Group-agent reinforcement learning has been proposed as a new type of reinforcement learning problem, distinct from single-agent and multi-agent reinforcement learning \cite{2202.05135}. In this scenario, multiple agents perform separate reinforcement learning tasks cooperatively, sharing knowledge without any cooperative or competitive behavior as a learning outcome. The Decentralised Distributed Asynchronous Learning (DDAL) framework has been introduced as the first distributed reinforcement learning framework designed for group-agent reinforcement learning, showing desirable performance and good scalability \cite{2202.05135}.
50
- INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message.
51
-
52
- INFO:utils.gpt_interaction:assistant_message: \paragraph{Reinforcement Learning and Q-Learning}
53
- Reinforcement learning is a learning paradigm for solving sequential decision-making problems, and Q-learning is one of its fundamental algorithms \cite{2009.07888}. The Q-learning algorithm, however, is known to suffer from maximization bias, which leads to the overestimation of action values \cite{2012.01100}. To address this issue, Double Q-learning has been proposed, which mitigates the overestimation problem but may result in slower convergence and increased memory requirements \cite{2303.08631}. Another approach to tackle the maximization bias is Self-correcting Q-learning, which balances the overestimation and underestimation issues while maintaining similar convergence guarantees as Q-learning \cite{2012.01100}.
54
-
55
- \paragraph{Deep Reinforcement Learning}
56
- Deep reinforcement learning (DRL) combines reinforcement learning with deep neural networks to tackle more complex problems \cite{2108.11510}. DRL has been successfully applied in various domains, including computer vision, where it has been used for tasks such as landmark localization, object detection, object tracking, image registration, image segmentation, and video analysis \cite{2108.11510}. Despite its success, DRL suffers from data inefficiency due to its trial-and-error learning mechanism, leading to the development of various sample-efficient methods, such as distributed deep reinforcement learning \cite{2212.00253}.
57
-
58
- \paragraph{Transfer Learning in Reinforcement Learning}
59
- Transfer learning has emerged as a promising approach to address the challenges faced by reinforcement learning, such as data inefficiency, by transferring knowledge from external sources to facilitate the learning process \cite{2009.07888}. A systematic investigation of transfer learning approaches in the context of deep reinforcement learning has been conducted, categorizing these approaches based on their goals, methodologies, compatible reinforcement learning backbones, and practical applications \cite{2009.07888}.
60
-
61
- \paragraph{Policy Gradient Methods}
62
- Policy gradient methods are widely used in reinforcement learning, particularly for continuous action settings. Natural policy gradients have been proposed as a more efficient alternative to traditional policy gradients, forming the foundation of contemporary reinforcement learning algorithms, such as Trust Region Policy Optimization (TRPO) and Proximal Policy Optimization (PPO) \cite{2209.01820}. Off-policy policy gradient methods have also been developed, with the introduction of Actor Critic with Emphatic weightings (ACE), which addresses the issues of previous off-policy policy gradient methods like OffPAC and DPG \cite{1811.09013}.
63
-
64
- \paragraph{Group-Agent Reinforcement Learning}
65
- Group-agent reinforcement learning has been proposed as a new type of reinforcement learning problem, distinct from single-agent and multi-agent reinforcement learning \cite{2202.05135}. In this scenario, multiple agents perform separate reinforcement learning tasks cooperatively, sharing knowledge without any cooperative or competitive behavior as a learning outcome. The Decentralised Distributed Asynchronous Learning (DDAL) framework has been introduced as the first distributed reinforcement learning framework designed for group-agent reinforcement learning, showing desirable performance and good scalability \cite{2202.05135}.
66
- INFO:root:For generating related works, 3912 tokens have been used (3269 for prompts; 643 for completion). 7946 tokens have been used in total.
67
- INFO:utils.prompts:Generated prompts for backgrounds: I am writing a machine learning survey about 'Reinforcement Learning'.
68
- You need to write the backgrounds section. Please clearly state the central problem in this field. Explain the foundational theories, concepts, and principles that underpin your research using as many as mathematical formulas or equations (written in LaTeX). Introduce any necessary mathematical notations, equations, or algorithms that are central to this field (written them in LaTeX). Do not include \section{...} but you can have \subsection{...}.
69
- Please read the following references:
70
- {'2001.09608': ' A lifelong reinforcement learning system is a learning system that has the\nability to learn through trail-and-error interaction with the environment over\nits lifetime. In this paper, I give some arguments to show that the traditional\nreinforcement learning paradigm fails to model this type of learning system.\nSome insights into lifelong reinforcement learning are provided, along with a\nsimplistic prototype lifelong reinforcement learning system.\n', '2108.11510': ' Deep reinforcement learning augments the reinforcement learning framework and\nutilizes the powerful representation of deep neural networks. Recent works have\ndemonstrated the remarkable successes of deep reinforcement learning in various\ndomains including finance, medicine, healthcare, video games, robotics, and\ncomputer vision. In this work, we provide a detailed review of recent and\nstate-of-the-art research advances of deep reinforcement learning in computer\nvision. We start with comprehending the theories of deep learning,\nreinforcement learning, and deep reinforcement learning. We then propose a\ncategorization of deep reinforcement learning methodologies and discuss their\nadvantages and limitations. In particular, we divide deep reinforcement\nlearning into seven main categories according to their applications in computer\nvision, i.e. (i)landmark localization (ii) object detection; (iii) object\ntracking; (iv) registration on both 2D image and 3D image volumetric data (v)\nimage segmentation; (vi) videos analysis; and (vii) other applications. Each of\nthese categories is further analyzed with reinforcement learning techniques,\nnetwork design, and performance. Moreover, we provide a comprehensive analysis\nof the existing publicly available datasets and examine source code\navailability. Finally, we present some open issues and discuss future research\ndirections on deep reinforcement learning in computer vision\n', '2202.05135': ' It can largely benefit the reinforcement learning process of each agent if\nmultiple agents perform their separate reinforcement learning tasks\ncooperatively. Different from multi-agent reinforcement learning where multiple\nagents are in a common environment and should learn to cooperate or compete\nwith each other, in this case each agent has its separate environment and only\ncommunicate with others to share knowledge without any cooperative or\ncompetitive behaviour as a learning outcome. In fact, this learning scenario is\nnot well understood yet and not well formulated. As the first effort, we\npropose group-agent reinforcement learning as a formulation of this scenario\nand the third type of reinforcement learning problem with respect to\nsingle-agent and multi-agent reinforcement learning. We then propose the first\ndistributed reinforcement learning framework called DDAL (Decentralised\nDistributed Asynchronous Learning) designed for group-agent reinforcement\nlearning. We show through experiments that DDAL achieved desirable performance\nwith very stable training and has good scalability.\n', '2212.00253': ' With the breakthrough of AlphaGo, deep reinforcement learning becomes a\nrecognized technique for solving sequential decision-making problems. Despite\nits reputation, data inefficiency caused by its trial and error learning\nmechanism makes deep reinforcement learning hard to be practical in a wide\nrange of areas. Plenty of methods have been developed for sample efficient deep\nreinforcement learning, such as environment modeling, experience transfer, and\ndistributed modifications, amongst which, distributed deep reinforcement\nlearning has shown its potential in various applications, such as\nhuman-computer gaming, and intelligent transportation. In this paper, we\nconclude the state of this exciting field, by comparing the classical\ndistributed deep reinforcement learning methods, and studying important\ncomponents to achieve efficient distributed learning, covering single player\nsingle agent distributed deep reinforcement learning to the most complex\nmultiple players multiple agents distributed deep reinforcement learning.\nFurthermore, we review recently released toolboxes that help to realize\ndistributed deep reinforcement learning without many modifications of their\nnon-distributed versions. By analyzing their strengths and weaknesses, a\nmulti-player multi-agent distributed deep reinforcement learning toolbox is\ndeveloped and released, which is further validated on Wargame, a complex\nenvironment, showing usability of the proposed toolbox for multiple players and\nmultiple agents distributed deep reinforcement learning under complex games.\nFinally, we try to point out challenges and future trends, hoping this brief\nreview can provide a guide or a spark for researchers who are interested in\ndistributed deep reinforcement learning.\n', '2009.07888': ' Reinforcement learning is a learning paradigm for solving sequential\ndecision-making problems. Recent years have witnessed remarkable progress in\nreinforcement learning upon the fast development of deep neural networks. Along\nwith the promising prospects of reinforcement learning in numerous domains such\nas robotics and game-playing, transfer learning has arisen to tackle various\nchallenges faced by reinforcement learning, by transferring knowledge from\nexternal expertise to facilitate the efficiency and effectiveness of the\nlearning process. In this survey, we systematically investigate the recent\nprogress of transfer learning approaches in the context of deep reinforcement\nlearning. Specifically, we provide a framework for categorizing the\nstate-of-the-art transfer learning approaches, under which we analyze their\ngoals, methodologies, compatible reinforcement learning backbones, and\npractical applications. We also draw connections between transfer learning and\nother relevant topics from the reinforcement learning perspective and explore\ntheir potential challenges that await future research progress.\n', '2303.08631': ' In Reinforcement Learning the Q-learning algorithm provably converges to the\noptimal solution. However, as others have demonstrated, Q-learning can also\noverestimate the values and thereby spend too long exploring unhelpful states.\nDouble Q-learning is a provably convergent alternative that mitigates some of\nthe overestimation issues, though sometimes at the expense of slower\nconvergence. We introduce an alternative algorithm that replaces the max\noperation with an average, resulting also in a provably convergent off-policy\nalgorithm which can mitigate overestimation yet retain similar convergence as\nstandard Q-learning.\n', '2106.14642': ' In this article, we propose a novel algorithm for deep reinforcement learning\nnamed Expert Q-learning. Expert Q-learning is inspired by Dueling Q-learning\nand aims at incorporating semi-supervised learning into reinforcement learning\nthrough splitting Q-values into state values and action advantages. We require\nthat an offline expert assesses the value of a state in a coarse manner using\nthree discrete values. An expert network is designed in addition to the\nQ-network, which updates each time following the regular offline minibatch\nupdate whenever the expert example buffer is not empty. Using the board game\nOthello, we compare our algorithm with the baseline Q-learning algorithm, which\nis a combination of Double Q-learning and Dueling Q-learning. Our results show\nthat Expert Q-learning is indeed useful and more resistant to the\noverestimation bias. The baseline Q-learning algorithm exhibits unstable and\nsuboptimal behavior in non-deterministic settings, whereas Expert Q-learning\ndemonstrates more robust performance with higher scores, illustrating that our\nalgorithm is indeed suitable to integrate state values from expert examples\ninto Q-learning.\n', '2106.01134': ' An improvement of Q-learning is proposed in this paper. It is different from\nclassic Q-learning in that the similarity between different states and actions\nis considered in the proposed method. During the training, a new updating\nmechanism is used, in which the Q value of the similar state-action pairs are\nupdated synchronously. The proposed method can be used in combination with both\ntabular Q-learning function and deep Q-learning. And the results of numerical\nexamples illustrate that compared to the classic Q-learning, the proposed\nmethod has a significantly better performance.\n', '2012.01100': ' The Q-learning algorithm is known to be affected by the maximization bias,\ni.e. the systematic overestimation of action values, an important issue that\nhas recently received renewed attention. Double Q-learning has been proposed as\nan efficient algorithm to mitigate this bias. However, this comes at the price\nof an underestimation of action values, in addition to increased memory\nrequirements and a slower convergence. In this paper, we introduce a new way to\naddress the maximization bias in the form of a "self-correcting algorithm" for\napproximating the maximum of an expected value. Our method balances the\noverestimation of the single estimator used in conventional Q-learning and the\nunderestimation of the double estimator used in Double Q-learning. Applying\nthis strategy to Q-learning results in Self-correcting Q-learning. We show\ntheoretically that this new algorithm enjoys the same convergence guarantees as\nQ-learning while being more accurate. Empirically, it performs better than\nDouble Q-learning in domains with rewards of high variance, and it even attains\nfaster convergence than Q-learning in domains with rewards of zero or low\nvariance. These advantages transfer to a Deep Q Network implementation that we\ncall Self-correcting DQN and which outperforms regular DQN and Double DQN on\nseveral tasks in the Atari 2600 domain.\n', '1703.02102': ' Off-policy stochastic actor-critic methods rely on approximating the\nstochastic policy gradient in order to derive an optimal policy. One may also\nderive the optimal policy by approximating the action-value gradient. The use\nof action-value gradients is desirable as policy improvement occurs along the\ndirection of steepest ascent. This has been studied extensively within the\ncontext of natural gradient actor-critic algorithms and more recently within\nthe context of deterministic policy gradients. In this paper we briefly discuss\nthe off-policy stochastic counterpart to deterministic action-value gradients,\nas well as an incremental approach for following the policy gradient in lieu of\nthe natural gradient.\n', '2209.01820': ' Traditional policy gradient methods are fundamentally flawed. Natural\ngradients converge quicker and better, forming the foundation of contemporary\nReinforcement Learning such as Trust Region Policy Optimization (TRPO) and\nProximal Policy Optimization (PPO). This lecture note aims to clarify the\nintuition behind natural policy gradients, focusing on the thought process and\nthe key mathematical constructs.\n', '1811.09013': ' Policy gradient methods are widely used for control in reinforcement\nlearning, particularly for the continuous action setting. There have been a\nhost of theoretically sound algorithms proposed for the on-policy setting, due\nto the existence of the policy gradient theorem which provides a simplified\nform for the gradient. In off-policy learning, however, where the behaviour\npolicy is not necessarily attempting to learn and follow the optimal policy for\nthe given task, the existence of such a theorem has been elusive. In this work,\nwe solve this open problem by providing the first off-policy policy gradient\ntheorem. The key to the derivation is the use of $emphatic$ $weightings$. We\ndevelop a new actor-critic algorithm$\\unicode{x2014}$called Actor Critic with\nEmphatic weightings (ACE)$\\unicode{x2014}$that approximates the simplified\ngradients provided by the theorem. We demonstrate in a simple counterexample\nthat previous off-policy policy gradient methods$\\unicode{x2014}$particularly\nOffPAC and DPG$\\unicode{x2014}$converge to the wrong solution whereas ACE finds\nthe optimal solution.\n', '1911.04817': ' The goal of policy gradient approaches is to find a policy in a given class\nof policies which maximizes the expected return. Given a differentiable model\nof the policy, we want to apply a gradient-ascent technique to reach a local\noptimum. We mainly use gradient ascent, because it is theoretically well\nresearched. The main issue is that the policy gradient with respect to the\nexpected return is not available, thus we need to estimate it. As policy\ngradient algorithms also tend to require on-policy data for the gradient\nestimate, their biggest weakness is sample efficiency. For this reason, most\nresearch is focused on finding algorithms with improved sample efficiency. This\npaper provides a formal introduction to policy gradient that shows the\ndevelopment of policy gradient approaches, and should enable the reader to\nfollow current research on the topic.\n', '1709.05067': ' Deep reinforcement learning is revolutionizing the artificial intelligence\nfield. Currently, it serves as a good starting point for constructing\nintelligent autonomous systems which offer a better knowledge of the visual\nworld. It is possible to scale deep reinforcement learning with the use of deep\nlearning and do amazing tasks such as use of pixels in playing video games. In\nthis paper, key concepts of deep reinforcement learning including reward\nfunction, differences between reinforcement learning and supervised learning\nand models for implementation of reinforcement are discussed. Key challenges\nrelated to the implementation of reinforcement learning in conversational AI\ndomain are identified as well as discussed in detail. Various conversational\nmodels which are based on deep reinforcement learning (as well as deep\nlearning) are also discussed. In summary, this paper discusses key aspects of\ndeep reinforcement learning which are crucial for designing an efficient\nconversational AI.\n', '1009.2998': ' The boundedness tests for the number of compact integral manifolds of\nautonomous ordinary differential systems, of autonomous total differential\nsystems, of linear systems of partial differential equations, of Pfaff systems\nof equations, and of systems of exterior differential equations are proved.\n', '1911.09048': ' This thesis (defended 10/07/2019) develops a theory of networks of hybrid\nopen systems and morphisms. It builds upon a framework of networks of\ncontinuous-time open systems as product and interconnection. We work out\ncategorical notions for hybrid systems, deterministic hybrid systems, hybrid\nopen systems, networks of hybrid open systems, and morphisms of networks of\nhybrid open systems.\n We also develop categorical notions for abstract systems, abstract open\nsystems, networks of abstract open systems, and morphisms of networks of\nabstract open systems. We show that a collection of relations holding among\npairs of systems induces a relation between interconnected systems. We use this\nresult for abstract systems to prove a corresponding result for networks of\nhybrid systems.\n This result translates as saying that our procedure for building networks\npreserves morphisms of open systems: a collection of morphisms of (sub)systems\nis sent to a morphism of networked systems. We thus both justify our formalism\nand concretize the intuition that a network is a collection of systems pieced\ntogether in a certain way.\n'}
71
- Every time you use information from the references, you need to cite its id after the sentence; for example, the sentence where you use information from 1905.09788 \cite{1905.09788}. Please avoid citing the same reference in the same paragraph.
72
- Put your response (do not include \section{...}) in the following Python script:with open("backgrounds.tex", "w") as f: f.write(r'''your_response''')
73
- INFO:utils.gpt_interaction:Reinforcement Learning (RL) is a learning paradigm for solving sequential decision-making problems, where an agent learns to make decisions by interacting with an environment and receiving feedback in the form of rewards or penalties \cite{2001.09608}. The central problem in RL is to find an optimal policy, which is a mapping from states to actions, that maximizes the expected cumulative reward over time.
74
-
75
- One of the foundational theories in RL is the concept of Markov Decision Processes (MDPs), which provide a mathematical framework for modeling decision-making problems. An MDP is defined as a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{2108.11510}. The objective in an MDP is to find a policy $\pi$ that maximizes the expected return $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $t$ is the current time step and $\gamma \in [0, 1]$ is the discount factor that determines the importance of future rewards.
76
-
77
- Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected return when taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by:
78
- \begin{equation}
79
- Q(s, a) \leftarrow Q(s, a) + \alpha [R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a)],
80
- \end{equation}
81
- where $\alpha$ is the learning rate, $s'$ is the next state, and $a'$ is an action in state $s'$ \cite{2106.01134}.
82
-
83
- Deep Reinforcement Learning (DRL) is an extension of RL that employs deep neural networks as function approximators for the value function or policy \cite{2108.11510}. DRL has demonstrated remarkable success in various domains, including finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. However, DRL is known to suffer from data inefficiency due to its trial-and-error learning mechanism, and several methods have been proposed to improve sample efficiency, such as environment modeling, experience transfer, and distributed modifications \cite{2202.05135}.
84
-
85
- Policy gradient methods are another class of RL algorithms that directly optimize the policy by following the gradient of the expected return with respect to the policy parameters \cite{1911.09048}. The policy gradient theorem provides a simplified form for the gradient, which has been widely used in on-policy learning algorithms \cite{1703.02102}. Off-policy learning, where the behavior policy is not necessarily attempting to learn and follow the optimal policy for the given task, has been a challenging area of research, and recent work has proposed the first off-policy policy gradient theorem using emphatic weightings \cite{1811.09013}.
86
-
87
- In summary, Reinforcement Learning aims to solve sequential decision-making problems by finding an optimal policy that maximizes the expected cumulative reward over time. Foundational theories and algorithms such as MDPs, Q-learning, DRL, and policy gradient methods provide the basis for RL research and applications in various domains \cite{2001.09608, 2108.11510}.
88
- INFO:utils.gpt_interaction:Responses are not put in Python codes. Directly return assistant_message.
89
-
90
- INFO:utils.gpt_interaction:assistant_message: Reinforcement Learning (RL) is a learning paradigm for solving sequential decision-making problems, where an agent learns to make decisions by interacting with an environment and receiving feedback in the form of rewards or penalties \cite{2001.09608}. The central problem in RL is to find an optimal policy, which is a mapping from states to actions, that maximizes the expected cumulative reward over time.
91
-
92
- One of the foundational theories in RL is the concept of Markov Decision Processes (MDPs), which provide a mathematical framework for modeling decision-making problems. An MDP is defined as a tuple $(S, A, P, R, \gamma)$, where $S$ is the set of states, $A$ is the set of actions, $P$ is the state transition probability function, $R$ is the reward function, and $\gamma$ is the discount factor \cite{2108.11510}. The objective in an MDP is to find a policy $\pi$ that maximizes the expected return $G_t = \sum_{k=0}^{\infty} \gamma^k R_{t+k+1}$, where $t$ is the current time step and $\gamma \in [0, 1]$ is the discount factor that determines the importance of future rewards.
93
-
94
- Q-learning is a popular model-free RL algorithm that estimates the action-value function $Q(s, a)$, which represents the expected return when taking action $a$ in state $s$ and following the optimal policy thereafter \cite{2303.08631}. The Q-learning update rule is given by:
95
- \begin{equation}
96
- Q(s, a) \leftarrow Q(s, a) + \alpha [R(s, a) + \gamma \max_{a'} Q(s', a') - Q(s, a)],
97
- \end{equation}
98
- where $\alpha$ is the learning rate, $s'$ is the next state, and $a'$ is an action in state $s'$ \cite{2106.01134}.
99
-
100
- Deep Reinforcement Learning (DRL) is an extension of RL that employs deep neural networks as function approximators for the value function or policy \cite{2108.11510}. DRL has demonstrated remarkable success in various domains, including finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. However, DRL is known to suffer from data inefficiency due to its trial-and-error learning mechanism, and several methods have been proposed to improve sample efficiency, such as environment modeling, experience transfer, and distributed modifications \cite{2202.05135}.
101
-
102
- Policy gradient methods are another class of RL algorithms that directly optimize the policy by following the gradient of the expected return with respect to the policy parameters \cite{1911.09048}. The policy gradient theorem provides a simplified form for the gradient, which has been widely used in on-policy learning algorithms \cite{1703.02102}. Off-policy learning, where the behavior policy is not necessarily attempting to learn and follow the optimal policy for the given task, has been a challenging area of research, and recent work has proposed the first off-policy policy gradient theorem using emphatic weightings \cite{1811.09013}.
103
-
104
- In summary, Reinforcement Learning aims to solve sequential decision-making problems by finding an optimal policy that maximizes the expected cumulative reward over time. Foundational theories and algorithms such as MDPs, Q-learning, DRL, and policy gradient methods provide the basis for RL research and applications in various domains \cite{2001.09608, 2108.11510}.
105
- INFO:root:For generating backgrounds, 4021 tokens have been used (3275 for prompts; 746 for completion). 11967 tokens have been used in total.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_012207/iclr2022_conference.bst DELETED
@@ -1,1440 +0,0 @@
1
- %% File: `iclr2017.bst'
2
- %% A copy of iclm2010.bst, which is a modification of `plainnl.bst' for use with natbib package
3
- %%
4
- %% Copyright 2010 Hal Daum\'e III
5
- %% Modified by J. F�rnkranz
6
- %% - Changed labels from (X and Y, 2000) to (X & Y, 2000)
7
- %%
8
- %% Copyright 1993-2007 Patrick W Daly
9
- %% Max-Planck-Institut f\"ur Sonnensystemforschung
10
- %% Max-Planck-Str. 2
11
- %% D-37191 Katlenburg-Lindau
12
- %% Germany
13
- %% E-mail: daly@mps.mpg.de
14
- %%
15
- %% This program can be redistributed and/or modified under the terms
16
- %% of the LaTeX Project Public License Distributed from CTAN
17
- %% archives in directory macros/latex/base/lppl.txt; either
18
- %% version 1 of the License, or any later version.
19
- %%
20
- % Version and source file information:
21
- % \ProvidesFile{icml2010.mbs}[2007/11/26 1.93 (PWD)]
22
- %
23
- % BibTeX `plainnat' family
24
- % version 0.99b for BibTeX versions 0.99a or later,
25
- % for LaTeX versions 2.09 and 2e.
26
- %
27
- % For use with the `natbib.sty' package; emulates the corresponding
28
- % member of the `plain' family, but with author-year citations.
29
- %
30
- % With version 6.0 of `natbib.sty', it may also be used for numerical
31
- % citations, while retaining the commands \citeauthor, \citefullauthor,
32
- % and \citeyear to print the corresponding information.
33
- %
34
- % For version 7.0 of `natbib.sty', the KEY field replaces missing
35
- % authors/editors, and the date is left blank in \bibitem.
36
- %
37
- % Includes field EID for the sequence/citation number of electronic journals
38
- % which is used instead of page numbers.
39
- %
40
- % Includes fields ISBN and ISSN.
41
- %
42
- % Includes field URL for Internet addresses.
43
- %
44
- % Includes field DOI for Digital Object Idenfifiers.
45
- %
46
- % Works best with the url.sty package of Donald Arseneau.
47
- %
48
- % Works with identical authors and year are further sorted by
49
- % citation key, to preserve any natural sequence.
50
- %
51
- ENTRY
52
- { address
53
- author
54
- booktitle
55
- chapter
56
- doi
57
- eid
58
- edition
59
- editor
60
- howpublished
61
- institution
62
- isbn
63
- issn
64
- journal
65
- key
66
- month
67
- note
68
- number
69
- organization
70
- pages
71
- publisher
72
- school
73
- series
74
- title
75
- type
76
- url
77
- volume
78
- year
79
- }
80
- {}
81
- { label extra.label sort.label short.list }
82
-
83
- INTEGERS { output.state before.all mid.sentence after.sentence after.block }
84
-
85
- FUNCTION {init.state.consts}
86
- { #0 'before.all :=
87
- #1 'mid.sentence :=
88
- #2 'after.sentence :=
89
- #3 'after.block :=
90
- }
91
-
92
- STRINGS { s t }
93
-
94
- FUNCTION {output.nonnull}
95
- { 's :=
96
- output.state mid.sentence =
97
- { ", " * write$ }
98
- { output.state after.block =
99
- { add.period$ write$
100
- newline$
101
- "\newblock " write$
102
- }
103
- { output.state before.all =
104
- 'write$
105
- { add.period$ " " * write$ }
106
- if$
107
- }
108
- if$
109
- mid.sentence 'output.state :=
110
- }
111
- if$
112
- s
113
- }
114
-
115
- FUNCTION {output}
116
- { duplicate$ empty$
117
- 'pop$
118
- 'output.nonnull
119
- if$
120
- }
121
-
122
- FUNCTION {output.check}
123
- { 't :=
124
- duplicate$ empty$
125
- { pop$ "empty " t * " in " * cite$ * warning$ }
126
- 'output.nonnull
127
- if$
128
- }
129
-
130
- FUNCTION {fin.entry}
131
- { add.period$
132
- write$
133
- newline$
134
- }
135
-
136
- FUNCTION {new.block}
137
- { output.state before.all =
138
- 'skip$
139
- { after.block 'output.state := }
140
- if$
141
- }
142
-
143
- FUNCTION {new.sentence}
144
- { output.state after.block =
145
- 'skip$
146
- { output.state before.all =
147
- 'skip$
148
- { after.sentence 'output.state := }
149
- if$
150
- }
151
- if$
152
- }
153
-
154
- FUNCTION {not}
155
- { { #0 }
156
- { #1 }
157
- if$
158
- }
159
-
160
- FUNCTION {and}
161
- { 'skip$
162
- { pop$ #0 }
163
- if$
164
- }
165
-
166
- FUNCTION {or}
167
- { { pop$ #1 }
168
- 'skip$
169
- if$
170
- }
171
-
172
- FUNCTION {new.block.checka}
173
- { empty$
174
- 'skip$
175
- 'new.block
176
- if$
177
- }
178
-
179
- FUNCTION {new.block.checkb}
180
- { empty$
181
- swap$ empty$
182
- and
183
- 'skip$
184
- 'new.block
185
- if$
186
- }
187
-
188
- FUNCTION {new.sentence.checka}
189
- { empty$
190
- 'skip$
191
- 'new.sentence
192
- if$
193
- }
194
-
195
- FUNCTION {new.sentence.checkb}
196
- { empty$
197
- swap$ empty$
198
- and
199
- 'skip$
200
- 'new.sentence
201
- if$
202
- }
203
-
204
- FUNCTION {field.or.null}
205
- { duplicate$ empty$
206
- { pop$ "" }
207
- 'skip$
208
- if$
209
- }
210
-
211
- FUNCTION {emphasize}
212
- { duplicate$ empty$
213
- { pop$ "" }
214
- { "\emph{" swap$ * "}" * }
215
- if$
216
- }
217
-
218
- INTEGERS { nameptr namesleft numnames }
219
-
220
- FUNCTION {format.names}
221
- { 's :=
222
- #1 'nameptr :=
223
- s num.names$ 'numnames :=
224
- numnames 'namesleft :=
225
- { namesleft #0 > }
226
- { s nameptr "{ff~}{vv~}{ll}{, jj}" format.name$ 't :=
227
- nameptr #1 >
228
- { namesleft #1 >
229
- { ", " * t * }
230
- { numnames #2 >
231
- { "," * }
232
- 'skip$
233
- if$
234
- t "others" =
235
- { " et~al." * }
236
- { " and " * t * }
237
- if$
238
- }
239
- if$
240
- }
241
- 't
242
- if$
243
- nameptr #1 + 'nameptr :=
244
- namesleft #1 - 'namesleft :=
245
- }
246
- while$
247
- }
248
-
249
- FUNCTION {format.key}
250
- { empty$
251
- { key field.or.null }
252
- { "" }
253
- if$
254
- }
255
-
256
- FUNCTION {format.authors}
257
- { author empty$
258
- { "" }
259
- { author format.names }
260
- if$
261
- }
262
-
263
- FUNCTION {format.editors}
264
- { editor empty$
265
- { "" }
266
- { editor format.names
267
- editor num.names$ #1 >
268
- { " (eds.)" * }
269
- { " (ed.)" * }
270
- if$
271
- }
272
- if$
273
- }
274
-
275
- FUNCTION {format.isbn}
276
- { isbn empty$
277
- { "" }
278
- { new.block "ISBN " isbn * }
279
- if$
280
- }
281
-
282
- FUNCTION {format.issn}
283
- { issn empty$
284
- { "" }
285
- { new.block "ISSN " issn * }
286
- if$
287
- }
288
-
289
- FUNCTION {format.url}
290
- { url empty$
291
- { "" }
292
- { new.block "URL \url{" url * "}" * }
293
- if$
294
- }
295
-
296
- FUNCTION {format.doi}
297
- { doi empty$
298
- { "" }
299
- { new.block "\doi{" doi * "}" * }
300
- if$
301
- }
302
-
303
- FUNCTION {format.title}
304
- { title empty$
305
- { "" }
306
- { title "t" change.case$ }
307
- if$
308
- }
309
-
310
- FUNCTION {format.full.names}
311
- {'s :=
312
- #1 'nameptr :=
313
- s num.names$ 'numnames :=
314
- numnames 'namesleft :=
315
- { namesleft #0 > }
316
- { s nameptr
317
- "{vv~}{ll}" format.name$ 't :=
318
- nameptr #1 >
319
- {
320
- namesleft #1 >
321
- { ", " * t * }
322
- {
323
- numnames #2 >
324
- { "," * }
325
- 'skip$
326
- if$
327
- t "others" =
328
- { " et~al." * }
329
- { " and " * t * }
330
- if$
331
- }
332
- if$
333
- }
334
- 't
335
- if$
336
- nameptr #1 + 'nameptr :=
337
- namesleft #1 - 'namesleft :=
338
- }
339
- while$
340
- }
341
-
342
- FUNCTION {author.editor.full}
343
- { author empty$
344
- { editor empty$
345
- { "" }
346
- { editor format.full.names }
347
- if$
348
- }
349
- { author format.full.names }
350
- if$
351
- }
352
-
353
- FUNCTION {author.full}
354
- { author empty$
355
- { "" }
356
- { author format.full.names }
357
- if$
358
- }
359
-
360
- FUNCTION {editor.full}
361
- { editor empty$
362
- { "" }
363
- { editor format.full.names }
364
- if$
365
- }
366
-
367
- FUNCTION {make.full.names}
368
- { type$ "book" =
369
- type$ "inbook" =
370
- or
371
- 'author.editor.full
372
- { type$ "proceedings" =
373
- 'editor.full
374
- 'author.full
375
- if$
376
- }
377
- if$
378
- }
379
-
380
- FUNCTION {output.bibitem}
381
- { newline$
382
- "\bibitem[" write$
383
- label write$
384
- ")" make.full.names duplicate$ short.list =
385
- { pop$ }
386
- { * }
387
- if$
388
- "]{" * write$
389
- cite$ write$
390
- "}" write$
391
- newline$
392
- ""
393
- before.all 'output.state :=
394
- }
395
-
396
- FUNCTION {n.dashify}
397
- { 't :=
398
- ""
399
- { t empty$ not }
400
- { t #1 #1 substring$ "-" =
401
- { t #1 #2 substring$ "--" = not
402
- { "--" *
403
- t #2 global.max$ substring$ 't :=
404
- }
405
- { { t #1 #1 substring$ "-" = }
406
- { "-" *
407
- t #2 global.max$ substring$ 't :=
408
- }
409
- while$
410
- }
411
- if$
412
- }
413
- { t #1 #1 substring$ *
414
- t #2 global.max$ substring$ 't :=
415
- }
416
- if$
417
- }
418
- while$
419
- }
420
-
421
- FUNCTION {format.date}
422
- { year duplicate$ empty$
423
- { "empty year in " cite$ * warning$
424
- pop$ "" }
425
- 'skip$
426
- if$
427
- month empty$
428
- 'skip$
429
- { month
430
- " " * swap$ *
431
- }
432
- if$
433
- extra.label *
434
- }
435
-
436
- FUNCTION {format.btitle}
437
- { title emphasize
438
- }
439
-
440
- FUNCTION {tie.or.space.connect}
441
- { duplicate$ text.length$ #3 <
442
- { "~" }
443
- { " " }
444
- if$
445
- swap$ * *
446
- }
447
-
448
- FUNCTION {either.or.check}
449
- { empty$
450
- 'pop$
451
- { "can't use both " swap$ * " fields in " * cite$ * warning$ }
452
- if$
453
- }
454
-
455
- FUNCTION {format.bvolume}
456
- { volume empty$
457
- { "" }
458
- { "volume" volume tie.or.space.connect
459
- series empty$
460
- 'skip$
461
- { " of " * series emphasize * }
462
- if$
463
- "volume and number" number either.or.check
464
- }
465
- if$
466
- }
467
-
468
- FUNCTION {format.number.series}
469
- { volume empty$
470
- { number empty$
471
- { series field.or.null }
472
- { output.state mid.sentence =
473
- { "number" }
474
- { "Number" }
475
- if$
476
- number tie.or.space.connect
477
- series empty$
478
- { "there's a number but no series in " cite$ * warning$ }
479
- { " in " * series * }
480
- if$
481
- }
482
- if$
483
- }
484
- { "" }
485
- if$
486
- }
487
-
488
- FUNCTION {format.edition}
489
- { edition empty$
490
- { "" }
491
- { output.state mid.sentence =
492
- { edition "l" change.case$ " edition" * }
493
- { edition "t" change.case$ " edition" * }
494
- if$
495
- }
496
- if$
497
- }
498
-
499
- INTEGERS { multiresult }
500
-
501
- FUNCTION {multi.page.check}
502
- { 't :=
503
- #0 'multiresult :=
504
- { multiresult not
505
- t empty$ not
506
- and
507
- }
508
- { t #1 #1 substring$
509
- duplicate$ "-" =
510
- swap$ duplicate$ "," =
511
- swap$ "+" =
512
- or or
513
- { #1 'multiresult := }
514
- { t #2 global.max$ substring$ 't := }
515
- if$
516
- }
517
- while$
518
- multiresult
519
- }
520
-
521
- FUNCTION {format.pages}
522
- { pages empty$
523
- { "" }
524
- { pages multi.page.check
525
- { "pp.\ " pages n.dashify tie.or.space.connect }
526
- { "pp.\ " pages tie.or.space.connect }
527
- if$
528
- }
529
- if$
530
- }
531
-
532
- FUNCTION {format.eid}
533
- { eid empty$
534
- { "" }
535
- { "art." eid tie.or.space.connect }
536
- if$
537
- }
538
-
539
- FUNCTION {format.vol.num.pages}
540
- { volume field.or.null
541
- number empty$
542
- 'skip$
543
- { "\penalty0 (" number * ")" * *
544
- volume empty$
545
- { "there's a number but no volume in " cite$ * warning$ }
546
- 'skip$
547
- if$
548
- }
549
- if$
550
- pages empty$
551
- 'skip$
552
- { duplicate$ empty$
553
- { pop$ format.pages }
554
- { ":\penalty0 " * pages n.dashify * }
555
- if$
556
- }
557
- if$
558
- }
559
-
560
- FUNCTION {format.vol.num.eid}
561
- { volume field.or.null
562
- number empty$
563
- 'skip$
564
- { "\penalty0 (" number * ")" * *
565
- volume empty$
566
- { "there's a number but no volume in " cite$ * warning$ }
567
- 'skip$
568
- if$
569
- }
570
- if$
571
- eid empty$
572
- 'skip$
573
- { duplicate$ empty$
574
- { pop$ format.eid }
575
- { ":\penalty0 " * eid * }
576
- if$
577
- }
578
- if$
579
- }
580
-
581
- FUNCTION {format.chapter.pages}
582
- { chapter empty$
583
- 'format.pages
584
- { type empty$
585
- { "chapter" }
586
- { type "l" change.case$ }
587
- if$
588
- chapter tie.or.space.connect
589
- pages empty$
590
- 'skip$
591
- { ", " * format.pages * }
592
- if$
593
- }
594
- if$
595
- }
596
-
597
- FUNCTION {format.in.ed.booktitle}
598
- { booktitle empty$
599
- { "" }
600
- { editor empty$
601
- { "In " booktitle emphasize * }
602
- { "In " format.editors * ", " * booktitle emphasize * }
603
- if$
604
- }
605
- if$
606
- }
607
-
608
- FUNCTION {empty.misc.check}
609
- { author empty$ title empty$ howpublished empty$
610
- month empty$ year empty$ note empty$
611
- and and and and and
612
- key empty$ not and
613
- { "all relevant fields are empty in " cite$ * warning$ }
614
- 'skip$
615
- if$
616
- }
617
-
618
- FUNCTION {format.thesis.type}
619
- { type empty$
620
- 'skip$
621
- { pop$
622
- type "t" change.case$
623
- }
624
- if$
625
- }
626
-
627
- FUNCTION {format.tr.number}
628
- { type empty$
629
- { "Technical Report" }
630
- 'type
631
- if$
632
- number empty$
633
- { "t" change.case$ }
634
- { number tie.or.space.connect }
635
- if$
636
- }
637
-
638
- FUNCTION {format.article.crossref}
639
- { key empty$
640
- { journal empty$
641
- { "need key or journal for " cite$ * " to crossref " * crossref *
642
- warning$
643
- ""
644
- }
645
- { "In \emph{" journal * "}" * }
646
- if$
647
- }
648
- { "In " }
649
- if$
650
- " \citet{" * crossref * "}" *
651
- }
652
-
653
- FUNCTION {format.book.crossref}
654
- { volume empty$
655
- { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
656
- "In "
657
- }
658
- { "Volume" volume tie.or.space.connect
659
- " of " *
660
- }
661
- if$
662
- editor empty$
663
- editor field.or.null author field.or.null =
664
- or
665
- { key empty$
666
- { series empty$
667
- { "need editor, key, or series for " cite$ * " to crossref " *
668
- crossref * warning$
669
- "" *
670
- }
671
- { "\emph{" * series * "}" * }
672
- if$
673
- }
674
- 'skip$
675
- if$
676
- }
677
- 'skip$
678
- if$
679
- " \citet{" * crossref * "}" *
680
- }
681
-
682
- FUNCTION {format.incoll.inproc.crossref}
683
- { editor empty$
684
- editor field.or.null author field.or.null =
685
- or
686
- { key empty$
687
- { booktitle empty$
688
- { "need editor, key, or booktitle for " cite$ * " to crossref " *
689
- crossref * warning$
690
- ""
691
- }
692
- { "In \emph{" booktitle * "}" * }
693
- if$
694
- }
695
- { "In " }
696
- if$
697
- }
698
- { "In " }
699
- if$
700
- " \citet{" * crossref * "}" *
701
- }
702
-
703
- FUNCTION {article}
704
- { output.bibitem
705
- format.authors "author" output.check
706
- author format.key output
707
- new.block
708
- format.title "title" output.check
709
- new.block
710
- crossref missing$
711
- { journal emphasize "journal" output.check
712
- eid empty$
713
- { format.vol.num.pages output }
714
- { format.vol.num.eid output }
715
- if$
716
- format.date "year" output.check
717
- }
718
- { format.article.crossref output.nonnull
719
- eid empty$
720
- { format.pages output }
721
- { format.eid output }
722
- if$
723
- }
724
- if$
725
- format.issn output
726
- format.doi output
727
- format.url output
728
- new.block
729
- note output
730
- fin.entry
731
- }
732
-
733
- FUNCTION {book}
734
- { output.bibitem
735
- author empty$
736
- { format.editors "author and editor" output.check
737
- editor format.key output
738
- }
739
- { format.authors output.nonnull
740
- crossref missing$
741
- { "author and editor" editor either.or.check }
742
- 'skip$
743
- if$
744
- }
745
- if$
746
- new.block
747
- format.btitle "title" output.check
748
- crossref missing$
749
- { format.bvolume output
750
- new.block
751
- format.number.series output
752
- new.sentence
753
- publisher "publisher" output.check
754
- address output
755
- }
756
- { new.block
757
- format.book.crossref output.nonnull
758
- }
759
- if$
760
- format.edition output
761
- format.date "year" output.check
762
- format.isbn output
763
- format.doi output
764
- format.url output
765
- new.block
766
- note output
767
- fin.entry
768
- }
769
-
770
- FUNCTION {booklet}
771
- { output.bibitem
772
- format.authors output
773
- author format.key output
774
- new.block
775
- format.title "title" output.check
776
- howpublished address new.block.checkb
777
- howpublished output
778
- address output
779
- format.date output
780
- format.isbn output
781
- format.doi output
782
- format.url output
783
- new.block
784
- note output
785
- fin.entry
786
- }
787
-
788
- FUNCTION {inbook}
789
- { output.bibitem
790
- author empty$
791
- { format.editors "author and editor" output.check
792
- editor format.key output
793
- }
794
- { format.authors output.nonnull
795
- crossref missing$
796
- { "author and editor" editor either.or.check }
797
- 'skip$
798
- if$
799
- }
800
- if$
801
- new.block
802
- format.btitle "title" output.check
803
- crossref missing$
804
- { format.bvolume output
805
- format.chapter.pages "chapter and pages" output.check
806
- new.block
807
- format.number.series output
808
- new.sentence
809
- publisher "publisher" output.check
810
- address output
811
- }
812
- { format.chapter.pages "chapter and pages" output.check
813
- new.block
814
- format.book.crossref output.nonnull
815
- }
816
- if$
817
- format.edition output
818
- format.date "year" output.check
819
- format.isbn output
820
- format.doi output
821
- format.url output
822
- new.block
823
- note output
824
- fin.entry
825
- }
826
-
827
- FUNCTION {incollection}
828
- { output.bibitem
829
- format.authors "author" output.check
830
- author format.key output
831
- new.block
832
- format.title "title" output.check
833
- new.block
834
- crossref missing$
835
- { format.in.ed.booktitle "booktitle" output.check
836
- format.bvolume output
837
- format.number.series output
838
- format.chapter.pages output
839
- new.sentence
840
- publisher "publisher" output.check
841
- address output
842
- format.edition output
843
- format.date "year" output.check
844
- }
845
- { format.incoll.inproc.crossref output.nonnull
846
- format.chapter.pages output
847
- }
848
- if$
849
- format.isbn output
850
- format.doi output
851
- format.url output
852
- new.block
853
- note output
854
- fin.entry
855
- }
856
-
857
- FUNCTION {inproceedings}
858
- { output.bibitem
859
- format.authors "author" output.check
860
- author format.key output
861
- new.block
862
- format.title "title" output.check
863
- new.block
864
- crossref missing$
865
- { format.in.ed.booktitle "booktitle" output.check
866
- format.bvolume output
867
- format.number.series output
868
- format.pages output
869
- address empty$
870
- { organization publisher new.sentence.checkb
871
- organization output
872
- publisher output
873
- format.date "year" output.check
874
- }
875
- { address output.nonnull
876
- format.date "year" output.check
877
- new.sentence
878
- organization output
879
- publisher output
880
- }
881
- if$
882
- }
883
- { format.incoll.inproc.crossref output.nonnull
884
- format.pages output
885
- }
886
- if$
887
- format.isbn output
888
- format.doi output
889
- format.url output
890
- new.block
891
- note output
892
- fin.entry
893
- }
894
-
895
- FUNCTION {conference} { inproceedings }
896
-
897
- FUNCTION {manual}
898
- { output.bibitem
899
- format.authors output
900
- author format.key output
901
- new.block
902
- format.btitle "title" output.check
903
- organization address new.block.checkb
904
- organization output
905
- address output
906
- format.edition output
907
- format.date output
908
- format.url output
909
- new.block
910
- note output
911
- fin.entry
912
- }
913
-
914
- FUNCTION {mastersthesis}
915
- { output.bibitem
916
- format.authors "author" output.check
917
- author format.key output
918
- new.block
919
- format.title "title" output.check
920
- new.block
921
- "Master's thesis" format.thesis.type output.nonnull
922
- school "school" output.check
923
- address output
924
- format.date "year" output.check
925
- format.url output
926
- new.block
927
- note output
928
- fin.entry
929
- }
930
-
931
- FUNCTION {misc}
932
- { output.bibitem
933
- format.authors output
934
- author format.key output
935
- title howpublished new.block.checkb
936
- format.title output
937
- howpublished new.block.checka
938
- howpublished output
939
- format.date output
940
- format.issn output
941
- format.url output
942
- new.block
943
- note output
944
- fin.entry
945
- empty.misc.check
946
- }
947
-
948
- FUNCTION {phdthesis}
949
- { output.bibitem
950
- format.authors "author" output.check
951
- author format.key output
952
- new.block
953
- format.btitle "title" output.check
954
- new.block
955
- "PhD thesis" format.thesis.type output.nonnull
956
- school "school" output.check
957
- address output
958
- format.date "year" output.check
959
- format.url output
960
- new.block
961
- note output
962
- fin.entry
963
- }
964
-
965
- FUNCTION {proceedings}
966
- { output.bibitem
967
- format.editors output
968
- editor format.key output
969
- new.block
970
- format.btitle "title" output.check
971
- format.bvolume output
972
- format.number.series output
973
- address output
974
- format.date "year" output.check
975
- new.sentence
976
- organization output
977
- publisher output
978
- format.isbn output
979
- format.doi output
980
- format.url output
981
- new.block
982
- note output
983
- fin.entry
984
- }
985
-
986
- FUNCTION {techreport}
987
- { output.bibitem
988
- format.authors "author" output.check
989
- author format.key output
990
- new.block
991
- format.title "title" output.check
992
- new.block
993
- format.tr.number output.nonnull
994
- institution "institution" output.check
995
- address output
996
- format.date "year" output.check
997
- format.url output
998
- new.block
999
- note output
1000
- fin.entry
1001
- }
1002
-
1003
- FUNCTION {unpublished}
1004
- { output.bibitem
1005
- format.authors "author" output.check
1006
- author format.key output
1007
- new.block
1008
- format.title "title" output.check
1009
- new.block
1010
- note "note" output.check
1011
- format.date output
1012
- format.url output
1013
- fin.entry
1014
- }
1015
-
1016
- FUNCTION {default.type} { misc }
1017
-
1018
-
1019
- MACRO {jan} {"January"}
1020
-
1021
- MACRO {feb} {"February"}
1022
-
1023
- MACRO {mar} {"March"}
1024
-
1025
- MACRO {apr} {"April"}
1026
-
1027
- MACRO {may} {"May"}
1028
-
1029
- MACRO {jun} {"June"}
1030
-
1031
- MACRO {jul} {"July"}
1032
-
1033
- MACRO {aug} {"August"}
1034
-
1035
- MACRO {sep} {"September"}
1036
-
1037
- MACRO {oct} {"October"}
1038
-
1039
- MACRO {nov} {"November"}
1040
-
1041
- MACRO {dec} {"December"}
1042
-
1043
-
1044
-
1045
- MACRO {acmcs} {"ACM Computing Surveys"}
1046
-
1047
- MACRO {acta} {"Acta Informatica"}
1048
-
1049
- MACRO {cacm} {"Communications of the ACM"}
1050
-
1051
- MACRO {ibmjrd} {"IBM Journal of Research and Development"}
1052
-
1053
- MACRO {ibmsj} {"IBM Systems Journal"}
1054
-
1055
- MACRO {ieeese} {"IEEE Transactions on Software Engineering"}
1056
-
1057
- MACRO {ieeetc} {"IEEE Transactions on Computers"}
1058
-
1059
- MACRO {ieeetcad}
1060
- {"IEEE Transactions on Computer-Aided Design of Integrated Circuits"}
1061
-
1062
- MACRO {ipl} {"Information Processing Letters"}
1063
-
1064
- MACRO {jacm} {"Journal of the ACM"}
1065
-
1066
- MACRO {jcss} {"Journal of Computer and System Sciences"}
1067
-
1068
- MACRO {scp} {"Science of Computer Programming"}
1069
-
1070
- MACRO {sicomp} {"SIAM Journal on Computing"}
1071
-
1072
- MACRO {tocs} {"ACM Transactions on Computer Systems"}
1073
-
1074
- MACRO {tods} {"ACM Transactions on Database Systems"}
1075
-
1076
- MACRO {tog} {"ACM Transactions on Graphics"}
1077
-
1078
- MACRO {toms} {"ACM Transactions on Mathematical Software"}
1079
-
1080
- MACRO {toois} {"ACM Transactions on Office Information Systems"}
1081
-
1082
- MACRO {toplas} {"ACM Transactions on Programming Languages and Systems"}
1083
-
1084
- MACRO {tcs} {"Theoretical Computer Science"}
1085
-
1086
-
1087
- READ
1088
-
1089
- FUNCTION {sortify}
1090
- { purify$
1091
- "l" change.case$
1092
- }
1093
-
1094
- INTEGERS { len }
1095
-
1096
- FUNCTION {chop.word}
1097
- { 's :=
1098
- 'len :=
1099
- s #1 len substring$ =
1100
- { s len #1 + global.max$ substring$ }
1101
- 's
1102
- if$
1103
- }
1104
-
1105
- FUNCTION {format.lab.names}
1106
- { 's :=
1107
- s #1 "{vv~}{ll}" format.name$
1108
- s num.names$ duplicate$
1109
- #2 >
1110
- { pop$ " et~al." * }
1111
- { #2 <
1112
- 'skip$
1113
- { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
1114
- { " et~al." * }
1115
- { " \& " * s #2 "{vv~}{ll}" format.name$ * }
1116
- if$
1117
- }
1118
- if$
1119
- }
1120
- if$
1121
- }
1122
-
1123
- FUNCTION {author.key.label}
1124
- { author empty$
1125
- { key empty$
1126
- { cite$ #1 #3 substring$ }
1127
- 'key
1128
- if$
1129
- }
1130
- { author format.lab.names }
1131
- if$
1132
- }
1133
-
1134
- FUNCTION {author.editor.key.label}
1135
- { author empty$
1136
- { editor empty$
1137
- { key empty$
1138
- { cite$ #1 #3 substring$ }
1139
- 'key
1140
- if$
1141
- }
1142
- { editor format.lab.names }
1143
- if$
1144
- }
1145
- { author format.lab.names }
1146
- if$
1147
- }
1148
-
1149
- FUNCTION {author.key.organization.label}
1150
- { author empty$
1151
- { key empty$
1152
- { organization empty$
1153
- { cite$ #1 #3 substring$ }
1154
- { "The " #4 organization chop.word #3 text.prefix$ }
1155
- if$
1156
- }
1157
- 'key
1158
- if$
1159
- }
1160
- { author format.lab.names }
1161
- if$
1162
- }
1163
-
1164
- FUNCTION {editor.key.organization.label}
1165
- { editor empty$
1166
- { key empty$
1167
- { organization empty$
1168
- { cite$ #1 #3 substring$ }
1169
- { "The " #4 organization chop.word #3 text.prefix$ }
1170
- if$
1171
- }
1172
- 'key
1173
- if$
1174
- }
1175
- { editor format.lab.names }
1176
- if$
1177
- }
1178
-
1179
- FUNCTION {calc.short.authors}
1180
- { type$ "book" =
1181
- type$ "inbook" =
1182
- or
1183
- 'author.editor.key.label
1184
- { type$ "proceedings" =
1185
- 'editor.key.organization.label
1186
- { type$ "manual" =
1187
- 'author.key.organization.label
1188
- 'author.key.label
1189
- if$
1190
- }
1191
- if$
1192
- }
1193
- if$
1194
- 'short.list :=
1195
- }
1196
-
1197
- FUNCTION {calc.label}
1198
- { calc.short.authors
1199
- short.list
1200
- "("
1201
- *
1202
- year duplicate$ empty$
1203
- short.list key field.or.null = or
1204
- { pop$ "" }
1205
- 'skip$
1206
- if$
1207
- *
1208
- 'label :=
1209
- }
1210
-
1211
- FUNCTION {sort.format.names}
1212
- { 's :=
1213
- #1 'nameptr :=
1214
- ""
1215
- s num.names$ 'numnames :=
1216
- numnames 'namesleft :=
1217
- { namesleft #0 > }
1218
- {
1219
- s nameptr "{vv{ } }{ll{ }}{ ff{ }}{ jj{ }}" format.name$ 't :=
1220
- nameptr #1 >
1221
- {
1222
- " " *
1223
- namesleft #1 = t "others" = and
1224
- { "zzzzz" * }
1225
- { numnames #2 > nameptr #2 = and
1226
- { "zz" * year field.or.null * " " * }
1227
- 'skip$
1228
- if$
1229
- t sortify *
1230
- }
1231
- if$
1232
- }
1233
- { t sortify * }
1234
- if$
1235
- nameptr #1 + 'nameptr :=
1236
- namesleft #1 - 'namesleft :=
1237
- }
1238
- while$
1239
- }
1240
-
1241
- FUNCTION {sort.format.title}
1242
- { 't :=
1243
- "A " #2
1244
- "An " #3
1245
- "The " #4 t chop.word
1246
- chop.word
1247
- chop.word
1248
- sortify
1249
- #1 global.max$ substring$
1250
- }
1251
-
1252
- FUNCTION {author.sort}
1253
- { author empty$
1254
- { key empty$
1255
- { "to sort, need author or key in " cite$ * warning$
1256
- ""
1257
- }
1258
- { key sortify }
1259
- if$
1260
- }
1261
- { author sort.format.names }
1262
- if$
1263
- }
1264
-
1265
- FUNCTION {author.editor.sort}
1266
- { author empty$
1267
- { editor empty$
1268
- { key empty$
1269
- { "to sort, need author, editor, or key in " cite$ * warning$
1270
- ""
1271
- }
1272
- { key sortify }
1273
- if$
1274
- }
1275
- { editor sort.format.names }
1276
- if$
1277
- }
1278
- { author sort.format.names }
1279
- if$
1280
- }
1281
-
1282
- FUNCTION {author.organization.sort}
1283
- { author empty$
1284
- { organization empty$
1285
- { key empty$
1286
- { "to sort, need author, organization, or key in " cite$ * warning$
1287
- ""
1288
- }
1289
- { key sortify }
1290
- if$
1291
- }
1292
- { "The " #4 organization chop.word sortify }
1293
- if$
1294
- }
1295
- { author sort.format.names }
1296
- if$
1297
- }
1298
-
1299
- FUNCTION {editor.organization.sort}
1300
- { editor empty$
1301
- { organization empty$
1302
- { key empty$
1303
- { "to sort, need editor, organization, or key in " cite$ * warning$
1304
- ""
1305
- }
1306
- { key sortify }
1307
- if$
1308
- }
1309
- { "The " #4 organization chop.word sortify }
1310
- if$
1311
- }
1312
- { editor sort.format.names }
1313
- if$
1314
- }
1315
-
1316
-
1317
- FUNCTION {presort}
1318
- { calc.label
1319
- label sortify
1320
- " "
1321
- *
1322
- type$ "book" =
1323
- type$ "inbook" =
1324
- or
1325
- 'author.editor.sort
1326
- { type$ "proceedings" =
1327
- 'editor.organization.sort
1328
- { type$ "manual" =
1329
- 'author.organization.sort
1330
- 'author.sort
1331
- if$
1332
- }
1333
- if$
1334
- }
1335
- if$
1336
- " "
1337
- *
1338
- year field.or.null sortify
1339
- *
1340
- " "
1341
- *
1342
- cite$
1343
- *
1344
- #1 entry.max$ substring$
1345
- 'sort.label :=
1346
- sort.label *
1347
- #1 entry.max$ substring$
1348
- 'sort.key$ :=
1349
- }
1350
-
1351
- ITERATE {presort}
1352
-
1353
- SORT
1354
-
1355
- STRINGS { longest.label last.label next.extra }
1356
-
1357
- INTEGERS { longest.label.width last.extra.num number.label }
1358
-
1359
- FUNCTION {initialize.longest.label}
1360
- { "" 'longest.label :=
1361
- #0 int.to.chr$ 'last.label :=
1362
- "" 'next.extra :=
1363
- #0 'longest.label.width :=
1364
- #0 'last.extra.num :=
1365
- #0 'number.label :=
1366
- }
1367
-
1368
- FUNCTION {forward.pass}
1369
- { last.label label =
1370
- { last.extra.num #1 + 'last.extra.num :=
1371
- last.extra.num int.to.chr$ 'extra.label :=
1372
- }
1373
- { "a" chr.to.int$ 'last.extra.num :=
1374
- "" 'extra.label :=
1375
- label 'last.label :=
1376
- }
1377
- if$
1378
- number.label #1 + 'number.label :=
1379
- }
1380
-
1381
- FUNCTION {reverse.pass}
1382
- { next.extra "b" =
1383
- { "a" 'extra.label := }
1384
- 'skip$
1385
- if$
1386
- extra.label 'next.extra :=
1387
- extra.label
1388
- duplicate$ empty$
1389
- 'skip$
1390
- { "{\natexlab{" swap$ * "}}" * }
1391
- if$
1392
- 'extra.label :=
1393
- label extra.label * 'label :=
1394
- }
1395
-
1396
- EXECUTE {initialize.longest.label}
1397
-
1398
- ITERATE {forward.pass}
1399
-
1400
- REVERSE {reverse.pass}
1401
-
1402
- FUNCTION {bib.sort.order}
1403
- { sort.label 'sort.key$ :=
1404
- }
1405
-
1406
- ITERATE {bib.sort.order}
1407
-
1408
- SORT
1409
-
1410
- FUNCTION {begin.bib}
1411
- { preamble$ empty$
1412
- 'skip$
1413
- { preamble$ write$ newline$ }
1414
- if$
1415
- "\begin{thebibliography}{" number.label int.to.str$ * "}" *
1416
- write$ newline$
1417
- "\providecommand{\natexlab}[1]{#1}"
1418
- write$ newline$
1419
- "\providecommand{\url}[1]{\texttt{#1}}"
1420
- write$ newline$
1421
- "\expandafter\ifx\csname urlstyle\endcsname\relax"
1422
- write$ newline$
1423
- " \providecommand{\doi}[1]{doi: #1}\else"
1424
- write$ newline$
1425
- " \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi"
1426
- write$ newline$
1427
- }
1428
-
1429
- EXECUTE {begin.bib}
1430
-
1431
- EXECUTE {init.state.consts}
1432
-
1433
- ITERATE {call.type$}
1434
-
1435
- FUNCTION {end.bib}
1436
- { newline$
1437
- "\end{thebibliography}" write$ newline$
1438
- }
1439
-
1440
- EXECUTE {end.bib}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_012207/iclr2022_conference.sty DELETED
@@ -1,245 +0,0 @@
1
- %%%% ICLR Macros (LaTex)
2
- %%%% Adapted by Hugo Larochelle from the NIPS stylefile Macros
3
- %%%% Style File
4
- %%%% Dec 12, 1990 Rev Aug 14, 1991; Sept, 1995; April, 1997; April, 1999; October 2014
5
-
6
- % This file can be used with Latex2e whether running in main mode, or
7
- % 2.09 compatibility mode.
8
- %
9
- % If using main mode, you need to include the commands
10
- % \documentclass{article}
11
- % \usepackage{iclr14submit_e,times}
12
- %
13
-
14
- % Change the overall width of the page. If these parameters are
15
- % changed, they will require corresponding changes in the
16
- % maketitle section.
17
- %
18
- \usepackage{eso-pic} % used by \AddToShipoutPicture
19
- \RequirePackage{fancyhdr}
20
- \RequirePackage{natbib}
21
-
22
- % modification to natbib citations
23
- \setcitestyle{authoryear,round,citesep={;},aysep={,},yysep={;}}
24
-
25
- \renewcommand{\topfraction}{0.95} % let figure take up nearly whole page
26
- \renewcommand{\textfraction}{0.05} % let figure take up nearly whole page
27
-
28
- % Define iclrfinal, set to true if iclrfinalcopy is defined
29
- \newif\ificlrfinal
30
- \iclrfinalfalse
31
- \def\iclrfinalcopy{\iclrfinaltrue}
32
- \font\iclrtenhv = phvb at 8pt
33
-
34
- % Specify the dimensions of each page
35
-
36
- \setlength{\paperheight}{11in}
37
- \setlength{\paperwidth}{8.5in}
38
-
39
-
40
- \oddsidemargin .5in % Note \oddsidemargin = \evensidemargin
41
- \evensidemargin .5in
42
- \marginparwidth 0.07 true in
43
- %\marginparwidth 0.75 true in
44
- %\topmargin 0 true pt % Nominal distance from top of page to top of
45
- %\topmargin 0.125in
46
- \topmargin -0.625in
47
- \addtolength{\headsep}{0.25in}
48
- \textheight 9.0 true in % Height of text (including footnotes & figures)
49
- \textwidth 5.5 true in % Width of text line.
50
- \widowpenalty=10000
51
- \clubpenalty=10000
52
-
53
- % \thispagestyle{empty} \pagestyle{empty}
54
- \flushbottom \sloppy
55
-
56
- % We're never going to need a table of contents, so just flush it to
57
- % save space --- suggested by drstrip@sandia-2
58
- \def\addcontentsline#1#2#3{}
59
-
60
- % Title stuff, taken from deproc.
61
- \def\maketitle{\par
62
- \begingroup
63
- \def\thefootnote{\fnsymbol{footnote}}
64
- \def\@makefnmark{\hbox to 0pt{$^{\@thefnmark}$\hss}} % for perfect author
65
- % name centering
66
- % The footnote-mark was overlapping the footnote-text,
67
- % added the following to fix this problem (MK)
68
- \long\def\@makefntext##1{\parindent 1em\noindent
69
- \hbox to1.8em{\hss $\m@th ^{\@thefnmark}$}##1}
70
- \@maketitle \@thanks
71
- \endgroup
72
- \setcounter{footnote}{0}
73
- \let\maketitle\relax \let\@maketitle\relax
74
- \gdef\@thanks{}\gdef\@author{}\gdef\@title{}\let\thanks\relax}
75
-
76
- % The toptitlebar has been raised to top-justify the first page
77
-
78
- \usepackage{fancyhdr}
79
- \pagestyle{fancy}
80
- \fancyhead{}
81
-
82
- % Title (includes both anonimized and non-anonimized versions)
83
- \def\@maketitle{\vbox{\hsize\textwidth
84
- %\linewidth\hsize \vskip 0.1in \toptitlebar \centering
85
- {\LARGE\sc \@title\par}
86
- %\bottomtitlebar % \vskip 0.1in % minus
87
- \ificlrfinal
88
- \lhead{Published as a conference paper at ICLR 2022}
89
- \def\And{\end{tabular}\hfil\linebreak[0]\hfil
90
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
91
- \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
92
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
93
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\@author\end{tabular}%
94
- \else
95
- \lhead{Under review as a conference paper at ICLR 2022}
96
- \def\And{\end{tabular}\hfil\linebreak[0]\hfil
97
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
98
- \def\AND{\end{tabular}\hfil\linebreak[4]\hfil
99
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}\ignorespaces}%
100
- \begin{tabular}[t]{l}\bf\rule{\z@}{24pt}Anonymous authors\\Paper under double-blind review\end{tabular}%
101
- \fi
102
- \vskip 0.3in minus 0.1in}}
103
-
104
- \renewenvironment{abstract}{\vskip.075in\centerline{\large\sc
105
- Abstract}\vspace{0.5ex}\begin{quote}}{\par\end{quote}\vskip 1ex}
106
-
107
- % sections with less space
108
- \def\section{\@startsection {section}{1}{\z@}{-2.0ex plus
109
- -0.5ex minus -.2ex}{1.5ex plus 0.3ex
110
- minus0.2ex}{\large\sc\raggedright}}
111
-
112
- \def\subsection{\@startsection{subsection}{2}{\z@}{-1.8ex plus
113
- -0.5ex minus -.2ex}{0.8ex plus .2ex}{\normalsize\sc\raggedright}}
114
- \def\subsubsection{\@startsection{subsubsection}{3}{\z@}{-1.5ex
115
- plus -0.5ex minus -.2ex}{0.5ex plus
116
- .2ex}{\normalsize\sc\raggedright}}
117
- \def\paragraph{\@startsection{paragraph}{4}{\z@}{1.5ex plus
118
- 0.5ex minus .2ex}{-1em}{\normalsize\bf}}
119
- \def\subparagraph{\@startsection{subparagraph}{5}{\z@}{1.5ex plus
120
- 0.5ex minus .2ex}{-1em}{\normalsize\sc}}
121
- \def\subsubsubsection{\vskip
122
- 5pt{\noindent\normalsize\rm\raggedright}}
123
-
124
-
125
- % Footnotes
126
- \footnotesep 6.65pt %
127
- \skip\footins 9pt plus 4pt minus 2pt
128
- \def\footnoterule{\kern-3pt \hrule width 12pc \kern 2.6pt }
129
- \setcounter{footnote}{0}
130
-
131
- % Lists and paragraphs
132
- \parindent 0pt
133
- \topsep 4pt plus 1pt minus 2pt
134
- \partopsep 1pt plus 0.5pt minus 0.5pt
135
- \itemsep 2pt plus 1pt minus 0.5pt
136
- \parsep 2pt plus 1pt minus 0.5pt
137
- \parskip .5pc
138
-
139
-
140
- %\leftmargin2em
141
- \leftmargin3pc
142
- \leftmargini\leftmargin \leftmarginii 2em
143
- \leftmarginiii 1.5em \leftmarginiv 1.0em \leftmarginv .5em
144
-
145
- %\labelsep \labelsep 5pt
146
-
147
- \def\@listi{\leftmargin\leftmargini}
148
- \def\@listii{\leftmargin\leftmarginii
149
- \labelwidth\leftmarginii\advance\labelwidth-\labelsep
150
- \topsep 2pt plus 1pt minus 0.5pt
151
- \parsep 1pt plus 0.5pt minus 0.5pt
152
- \itemsep \parsep}
153
- \def\@listiii{\leftmargin\leftmarginiii
154
- \labelwidth\leftmarginiii\advance\labelwidth-\labelsep
155
- \topsep 1pt plus 0.5pt minus 0.5pt
156
- \parsep \z@ \partopsep 0.5pt plus 0pt minus 0.5pt
157
- \itemsep \topsep}
158
- \def\@listiv{\leftmargin\leftmarginiv
159
- \labelwidth\leftmarginiv\advance\labelwidth-\labelsep}
160
- \def\@listv{\leftmargin\leftmarginv
161
- \labelwidth\leftmarginv\advance\labelwidth-\labelsep}
162
- \def\@listvi{\leftmargin\leftmarginvi
163
- \labelwidth\leftmarginvi\advance\labelwidth-\labelsep}
164
-
165
- \abovedisplayskip 7pt plus2pt minus5pt%
166
- \belowdisplayskip \abovedisplayskip
167
- \abovedisplayshortskip 0pt plus3pt%
168
- \belowdisplayshortskip 4pt plus3pt minus3pt%
169
-
170
- % Less leading in most fonts (due to the narrow columns)
171
- % The choices were between 1-pt and 1.5-pt leading
172
- %\def\@normalsize{\@setsize\normalsize{11pt}\xpt\@xpt} % got rid of @ (MK)
173
- \def\normalsize{\@setsize\normalsize{11pt}\xpt\@xpt}
174
- \def\small{\@setsize\small{10pt}\ixpt\@ixpt}
175
- \def\footnotesize{\@setsize\footnotesize{10pt}\ixpt\@ixpt}
176
- \def\scriptsize{\@setsize\scriptsize{8pt}\viipt\@viipt}
177
- \def\tiny{\@setsize\tiny{7pt}\vipt\@vipt}
178
- \def\large{\@setsize\large{14pt}\xiipt\@xiipt}
179
- \def\Large{\@setsize\Large{16pt}\xivpt\@xivpt}
180
- \def\LARGE{\@setsize\LARGE{20pt}\xviipt\@xviipt}
181
- \def\huge{\@setsize\huge{23pt}\xxpt\@xxpt}
182
- \def\Huge{\@setsize\Huge{28pt}\xxvpt\@xxvpt}
183
-
184
- \def\toptitlebar{\hrule height4pt\vskip .25in\vskip-\parskip}
185
-
186
- \def\bottomtitlebar{\vskip .29in\vskip-\parskip\hrule height1pt\vskip
187
- .09in} %
188
- %Reduced second vskip to compensate for adding the strut in \@author
189
-
190
-
191
- %% % Vertical Ruler
192
- %% % This code is, largely, from the CVPR 2010 conference style file
193
- %% % ----- define vruler
194
- %% \makeatletter
195
- %% \newbox\iclrrulerbox
196
- %% \newcount\iclrrulercount
197
- %% \newdimen\iclrruleroffset
198
- %% \newdimen\cv@lineheight
199
- %% \newdimen\cv@boxheight
200
- %% \newbox\cv@tmpbox
201
- %% \newcount\cv@refno
202
- %% \newcount\cv@tot
203
- %% % NUMBER with left flushed zeros \fillzeros[<WIDTH>]<NUMBER>
204
- %% \newcount\cv@tmpc@ \newcount\cv@tmpc
205
- %% \def\fillzeros[#1]#2{\cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi
206
- %% \cv@tmpc=1 %
207
- %% \loop\ifnum\cv@tmpc@<10 \else \divide\cv@tmpc@ by 10 \advance\cv@tmpc by 1 \fi
208
- %% \ifnum\cv@tmpc@=10\relax\cv@tmpc@=11\relax\fi \ifnum\cv@tmpc@>10 \repeat
209
- %% \ifnum#2<0\advance\cv@tmpc1\relax-\fi
210
- %% \loop\ifnum\cv@tmpc<#1\relax0\advance\cv@tmpc1\relax\fi \ifnum\cv@tmpc<#1 \repeat
211
- %% \cv@tmpc@=#2\relax\ifnum\cv@tmpc@<0\cv@tmpc@=-\cv@tmpc@\fi \relax\the\cv@tmpc@}%
212
- %% % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
213
- %% \def\makevruler[#1][#2][#3][#4][#5]{\begingroup\offinterlineskip
214
- %% \textheight=#5\vbadness=10000\vfuzz=120ex\overfullrule=0pt%
215
- %% \global\setbox\iclrrulerbox=\vbox to \textheight{%
216
- %% {\parskip=0pt\hfuzz=150em\cv@boxheight=\textheight
217
- %% \cv@lineheight=#1\global\iclrrulercount=#2%
218
- %% \cv@tot\cv@boxheight\divide\cv@tot\cv@lineheight\advance\cv@tot2%
219
- %% \cv@refno1\vskip-\cv@lineheight\vskip1ex%
220
- %% \loop\setbox\cv@tmpbox=\hbox to0cm{{\iclrtenhv\hfil\fillzeros[#4]\iclrrulercount}}%
221
- %% \ht\cv@tmpbox\cv@lineheight\dp\cv@tmpbox0pt\box\cv@tmpbox\break
222
- %% \advance\cv@refno1\global\advance\iclrrulercount#3\relax
223
- %% \ifnum\cv@refno<\cv@tot\repeat}}\endgroup}%
224
- %% \makeatother
225
- %% % ----- end of vruler
226
-
227
- %% % \makevruler[<SCALE>][<INITIAL_COUNT>][<STEP>][<DIGITS>][<HEIGHT>]
228
- %% \def\iclrruler#1{\makevruler[12pt][#1][1][3][0.993\textheight]\usebox{\iclrrulerbox}}
229
- %% \AddToShipoutPicture{%
230
- %% \ificlrfinal\else
231
- %% \iclrruleroffset=\textheight
232
- %% \advance\iclrruleroffset by -3.7pt
233
- %% \color[rgb]{.7,.7,.7}
234
- %% \AtTextUpperLeft{%
235
- %% \put(\LenToUnit{-35pt},\LenToUnit{-\iclrruleroffset}){%left ruler
236
- %% \iclrruler{\iclrrulercount}}
237
- %% }
238
- %% \fi
239
- %% }
240
- %%% To add a vertical bar on the side
241
- %\AddToShipoutPicture{
242
- %\AtTextLowerLeft{
243
- %\hspace*{-1.8cm}
244
- %\colorbox[rgb]{0.7,0.7,0.7}{\small \parbox[b][\textheight]{0.1cm}{}}}
245
- %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_012207/introduction.tex DELETED
@@ -1,10 +0,0 @@
1
- \section{introduction}
2
- Reinforcement Learning (RL) has emerged as a powerful learning paradigm for solving sequential decision-making problems, with significant advancements made in recent years due to the integration of deep neural networks \cite{2108.11510}. As a result, deep reinforcement learning has demonstrated remarkable success in various domains, including finance, medicine, healthcare, video games, robotics, and computer vision \cite{2108.11510}. However, traditional RL paradigms face challenges in modeling lifelong learning systems, which learn through trial-and-error interactions with the environment over their lifetime \cite{2001.09608}. Moreover, data inefficiency caused by trial-and-error learning mechanisms makes deep RL difficult to apply in a wide range of areas \cite{2212.00253}. This survey aims to address these challenges by exploring recent advancements in reinforcement learning, focusing on the development of more efficient and effective learning algorithms.
3
-
4
- The problem we address is the development of more efficient and effective reinforcement learning algorithms that can learn from trial-and-error interactions with the environment, while also being able to transfer knowledge from external expertise to facilitate the learning process \cite{2009.07888}. Our proposed solution involves investigating recent advancements in RL, such as deep RL in computer vision \cite{2108.11510}, group-agent reinforcement learning \cite{2202.05135}, and distributed deep reinforcement learning \cite{2212.00253}. We aim to answer the following research questions: (1) How can we improve the efficiency and effectiveness of reinforcement learning algorithms? (2) What are the key advancements in RL that can be leveraged to address the challenges faced by traditional RL paradigms?
5
-
6
- Related work in the field of reinforcement learning includes the development of algorithms such as Q-learning, Double Q-learning, and Dueling Q-learning \cite{2106.14642, 2106.01134, 2012.01100}. Additionally, transfer learning approaches have been explored to tackle various challenges faced by RL, by transferring knowledge from external expertise to facilitate the learning process \cite{2009.07888}. Furthermore, recent research has focused on the development of distributed deep RL algorithms, which have shown potential in various applications such as human-computer gaming and intelligent transportation \cite{2212.00253}.
7
-
8
- Our work differs from the existing literature in that we aim to provide a comprehensive survey of the recent advancements in reinforcement learning, focusing on the development of more efficient and effective learning algorithms. By investigating various RL techniques and methodologies, we hope to identify key advancements that can be leveraged to address the challenges faced by traditional RL paradigms. Moreover, our survey will not only discuss the algorithms themselves but also explore their applications in various domains, providing a more in-depth understanding of the potential impact of these advancements on the AI community.
9
-
10
- In conclusion, this survey will provide a detailed overview of recent advancements in reinforcement learning, with a focus on addressing the challenges faced by traditional RL paradigms and improving the efficiency and effectiveness of learning algorithms. By investigating various RL techniques and methodologies, we aim to identify key advancements that can be leveraged to address these challenges and contribute to the ongoing development of reinforcement learning as a powerful learning paradigm for solving sequential decision-making problems in various domains.
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_012207/main.aux DELETED
@@ -1,79 +0,0 @@
1
- \relax
2
- \providecommand\hyper@newdestlabel[2]{}
3
- \providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
4
- \HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
5
- \global\let\oldcontentsline\contentsline
6
- \gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
7
- \global\let\oldnewlabel\newlabel
8
- \gdef\newlabel#1#2{\newlabelxx{#1}#2}
9
- \gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
10
- \AtEndDocument{\ifx\hyper@anchor\@undefined
11
- \let\contentsline\oldcontentsline
12
- \let\newlabel\oldnewlabel
13
- \fi}
14
- \fi}
15
- \global\let\hyper@last\relax
16
- \gdef\HyperFirstAtBeginDocument#1{#1}
17
- \providecommand\HyField@AuxAddToFields[1]{}
18
- \providecommand\HyField@AuxAddToCoFields[2]{}
19
- \citation{2108.11510}
20
- \citation{2108.11510}
21
- \citation{2001.09608}
22
- \citation{2212.00253}
23
- \citation{2009.07888}
24
- \citation{2108.11510}
25
- \citation{2202.05135}
26
- \citation{2212.00253}
27
- \citation{2106.14642}
28
- \citation{2106.01134}
29
- \citation{2012.01100}
30
- \citation{2009.07888}
31
- \citation{2212.00253}
32
- \@writefile{toc}{\contentsline {section}{\numberline {1}introduction}{1}{section.1}\protected@file@percent }
33
- \citation{2009.07888}
34
- \citation{2012.01100}
35
- \citation{2303.08631}
36
- \citation{2012.01100}
37
- \citation{2108.11510}
38
- \citation{2108.11510}
39
- \citation{2212.00253}
40
- \citation{2009.07888}
41
- \citation{2009.07888}
42
- \citation{2209.01820}
43
- \citation{1811.09013}
44
- \@writefile{toc}{\contentsline {section}{\numberline {2}related works}{2}{section.2}\protected@file@percent }
45
- \@writefile{toc}{\contentsline {paragraph}{Reinforcement Learning and Q-Learning}{2}{section*.1}\protected@file@percent }
46
- \@writefile{toc}{\contentsline {paragraph}{Deep Reinforcement Learning}{2}{section*.2}\protected@file@percent }
47
- \@writefile{toc}{\contentsline {paragraph}{Transfer Learning in Reinforcement Learning}{2}{section*.3}\protected@file@percent }
48
- \citation{2202.05135}
49
- \citation{2202.05135}
50
- \citation{2001.09608}
51
- \citation{2108.11510}
52
- \citation{2303.08631}
53
- \citation{2106.01134}
54
- \citation{2108.11510}
55
- \citation{2108.11510}
56
- \citation{2202.05135}
57
- \@writefile{toc}{\contentsline {paragraph}{Policy Gradient Methods}{3}{section*.4}\protected@file@percent }
58
- \@writefile{toc}{\contentsline {paragraph}{Group-Agent Reinforcement Learning}{3}{section*.5}\protected@file@percent }
59
- \@writefile{toc}{\contentsline {section}{\numberline {3}backgrounds}{3}{section.3}\protected@file@percent }
60
- \citation{1911.09048}
61
- \citation{1703.02102}
62
- \citation{1811.09013}
63
- \citation{2001.09608}
64
- \citation{2108.11510}
65
- \bibdata{ref}
66
- \bibcite{2303.08631}{1}
67
- \bibcite{1811.09013}{2}
68
- \bibcite{2202.05135}{3}
69
- \bibcite{2001.09608}{4}
70
- \bibcite{2106.14642}{5}
71
- \bibcite{2108.11510}{6}
72
- \bibcite{2212.00253}{7}
73
- \bibcite{2012.01100}{8}
74
- \bibcite{1911.09048}{9}
75
- \bibcite{2209.01820}{10}
76
- \bibcite{2106.01134}{11}
77
- \bibcite{1703.02102}{12}
78
- \bibcite{2009.07888}{13}
79
- \bibstyle{abbrv}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
outputs/outputs_20230421_012207/main.bbl DELETED
@@ -1,72 +0,0 @@
1
- \begin{thebibliography}{10}
2
-
3
- \bibitem{2303.08631}
4
- D.~Barber.
5
- \newblock Smoothed q-learning.
6
- \newblock {\em arXiv preprint arXiv:2303.08631}, 2023.
7
-
8
- \bibitem{1811.09013}
9
- M.~W. Ehsan~Imani, Eric~Graves.
10
- \newblock An off-policy policy gradient theorem using emphatic weightings.
11
- \newblock {\em arXiv preprint arXiv:1811.09013}, 2018.
12
-
13
- \bibitem{2202.05135}
14
- X.-J.~Z. Kaiyue~Wu.
15
- \newblock Group-agent reinforcement learning.
16
- \newblock {\em arXiv preprint arXiv:2202.05135}, 2022.
17
-
18
- \bibitem{2001.09608}
19
- C.~Li.
20
- \newblock Some insights into lifelong reinforcement learning systems.
21
- \newblock {\em arXiv preprint arXiv:2001.09608}, 2020.
22
-
23
- \bibitem{2106.14642}
24
- M.~G. P.~E. Li~Meng, Anis~Yazidi.
25
- \newblock Expert q-learning: Deep reinforcement learning with coarse state
26
- values from offline expert examples.
27
- \newblock {\em arXiv preprint arXiv:2106.14642}, 2021.
28
-
29
- \bibitem{2108.11510}
30
- K.~Y. K. L. M.~S. Ngan~Le, Vidhiwar Singh~Rathour.
31
- \newblock Deep reinforcement learning in computer vision: A comprehensive
32
- survey.
33
- \newblock {\em arXiv preprint arXiv:2108.11510}, 2021.
34
-
35
- \bibitem{2212.00253}
36
- S.~S. J. Y. M. Z. K. H. B. L. L.~W. Qiyue~Yin, Tongtong~Yu.
37
- \newblock Distributed deep reinforcement learning: A survey and a multi-player
38
- multi-agent learning toolbox.
39
- \newblock {\em arXiv preprint arXiv:2212.00253}, 2022.
40
-
41
- \bibitem{2012.01100}
42
- M.~R. Rong~Zhu.
43
- \newblock Self-correcting q-learning.
44
- \newblock {\em arXiv preprint arXiv:2012.01100}, 2020.
45
-
46
- \bibitem{1911.09048}
47
- J.~Schmidt.
48
- \newblock Morphisms of networks of hybrid open systems.
49
- \newblock {\em arXiv preprint arXiv:1911.09048}, 2019.
50
-
51
- \bibitem{2209.01820}
52
- W.~J.~A. van Heeswijk.
53
- \newblock Natural policy gradients in reinforcement learning explained.
54
- \newblock {\em arXiv preprint arXiv:2209.01820}, 2022.
55
-
56
- \bibitem{2106.01134}
57
- J.~L. Wei~Liao, Xiaohui~Wei.
58
- \newblock Smooth q-learning: Accelerate convergence of q-learning using
59
- similarity.
60
- \newblock {\em arXiv preprint arXiv:2106.01134}, 2021.
61
-
62
- \bibitem{1703.02102}
63
- V.~K. Yemi~Okesanjo.
64
- \newblock Revisiting stochastic off-policy action-value gradients.
65
- \newblock {\em arXiv preprint arXiv:1703.02102}, 2017.
66
-
67
- \bibitem{2009.07888}
68
- A.~K. J. J.~Z. Zhuangdi~Zhu, Kaixiang~Lin.
69
- \newblock Transfer learning in deep reinforcement learning: A survey.
70
- \newblock {\em arXiv preprint arXiv:2009.07888}, 2020.
71
-
72
- \end{thebibliography}