pszemraj commited on
Commit
87e5c9c
β€’
1 Parent(s): 701f4ca

πŸŽ‰ init from previous scripts

Browse files

Signed-off-by: peter szemraj <peterszemraj@gmail.com>

.gitignore ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # logs
2
+ *.log
3
+ *LOGFILE*
4
+
5
+ # output files need to be force-added
6
+ *.csv
7
+ *.png
8
+ *.jpg
9
+ *.jpeg
10
+ *.pkl
11
+ *.xlsx
12
+ *.txt
13
+
14
+ # cache
15
+ *__pycache__/
16
+ *.pyc
17
+
18
+ # reports folder - need to be force-added
19
+ *reports/
20
+
21
+ # scratch files and folders
22
+
23
+ *scratch*
24
+ *scratch/
25
+
26
+ # notebooks
27
+
28
+ *notebooks/
29
+ *.ipynb
app.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import time
3
+ from pathlib import Path
4
+
5
+ import gradio as gr
6
+ import nltk
7
+ from cleantext import clean
8
+
9
+ from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
10
+ from utils import load_example_filenames, truncate_word_count
11
+
12
+ _here = Path(__file__).parent
13
+
14
+ nltk.download("stopwords") # TODO=find where this requirement originates from
15
+
16
+ logging.basicConfig(
17
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
18
+ )
19
+
20
+
21
+ def proc_submission(
22
+ input_text: str,
23
+ model_size: str,
24
+ num_beams,
25
+ token_batch_length,
26
+ length_penalty,
27
+ repetition_penalty,
28
+ no_repeat_ngram_size,
29
+ max_input_length: int = 768,
30
+ ):
31
+ """
32
+ proc_submission - a helper function for the gradio module to process submissions
33
+
34
+ Args:
35
+ input_text (str): the input text to summarize
36
+ model_size (str): the size of the model to use
37
+ num_beams (int): the number of beams to use
38
+ token_batch_length (int): the length of the token batches to use
39
+ length_penalty (float): the length penalty to use
40
+ repetition_penalty (float): the repetition penalty to use
41
+ no_repeat_ngram_size (int): the no repeat ngram size to use
42
+ max_input_length (int, optional): the maximum input length to use. Defaults to 768.
43
+
44
+ Returns:
45
+ str in HTML format, string of the summary, str of score
46
+ """
47
+
48
+ settings = {
49
+ "length_penalty": float(length_penalty),
50
+ "repetition_penalty": float(repetition_penalty),
51
+ "no_repeat_ngram_size": int(no_repeat_ngram_size),
52
+ "encoder_no_repeat_ngram_size": 4,
53
+ "num_beams": int(num_beams),
54
+ "min_length": 4,
55
+ "max_length": int(token_batch_length // 4),
56
+ "early_stopping": True,
57
+ "do_sample": False,
58
+ }
59
+ st = time.perf_counter()
60
+ history = {}
61
+ clean_text = clean(input_text, lower=False)
62
+ max_input_length = 2048 if model_size == "base" else max_input_length
63
+ processed = truncate_word_count(clean_text, max_input_length)
64
+
65
+ if processed["was_truncated"]:
66
+ tr_in = processed["truncated_text"]
67
+ msg = f"Input text was truncated to {max_input_length} words (based on whitespace)"
68
+ logging.warning(msg)
69
+ history["WARNING"] = msg
70
+ else:
71
+ tr_in = input_text
72
+ msg = None
73
+
74
+ _summaries = summarize_via_tokenbatches(
75
+ tr_in,
76
+ model_sm if model_size == "base" else model,
77
+ tokenizer_sm if model_size == "base" else tokenizer,
78
+ batch_length=token_batch_length,
79
+ **settings,
80
+ )
81
+ sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
82
+ sum_scores = [
83
+ f" - Section {i}: {round(s['summary_score'],4)}"
84
+ for i, s in enumerate(_summaries)
85
+ ]
86
+
87
+ sum_text_out = "\n".join(sum_text)
88
+ history["Summary Scores"] = "<br><br>"
89
+ scores_out = "\n".join(sum_scores)
90
+ rt = round((time.perf_counter() - st) / 60, 2)
91
+ print(f"Runtime: {rt} minutes")
92
+ html = ""
93
+ html += f"<p>Runtime: {rt} minutes on CPU</p>"
94
+ if msg is not None:
95
+ html += f"<h2>WARNING:</h2><hr><b>{msg}</b><br><br>"
96
+
97
+ html += ""
98
+
99
+ return html, sum_text_out, scores_out
100
+
101
+
102
+ def load_single_example_text(
103
+ example_path: str or Path,
104
+ ):
105
+ """
106
+ load_single_example - a helper function for the gradio module to load examples
107
+ Returns:
108
+ list of str, the examples
109
+ """
110
+ global name_to_path
111
+ full_ex_path = name_to_path[example_path]
112
+ full_ex_path = Path(full_ex_path)
113
+ # load the examples into a list
114
+ with open(full_ex_path, "r", encoding="utf-8", errors="ignore") as f:
115
+ raw_text = f.read()
116
+ text = clean(raw_text, lower=False)
117
+ return text
118
+
119
+
120
+ def load_uploaded_file(file_obj):
121
+ """
122
+ load_uploaded_file - process an uploaded file
123
+
124
+ Args:
125
+ file_obj (POTENTIALLY list): Gradio file object inside a list
126
+
127
+ Returns:
128
+ str, the uploaded file contents
129
+ """
130
+
131
+ # file_path = Path(file_obj[0].name)
132
+
133
+ # check if mysterious file object is a list
134
+ if isinstance(file_obj, list):
135
+ file_obj = file_obj[0]
136
+ file_path = Path(file_obj.name)
137
+ try:
138
+ with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
139
+ raw_text = f.read()
140
+ text = clean(raw_text, lower=False)
141
+ return text
142
+ except Exception as e:
143
+ logging.info(f"Trying to load file with path {file_path}, error: {e}")
144
+ return "Error: Could not read file. Ensure that it is a valid text file with encoding UTF-8."
145
+
146
+
147
+ if __name__ == "__main__":
148
+
149
+ model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
150
+ model_sm, tokenizer_sm = load_model_and_tokenizer("pszemraj/led-base-book-summary")
151
+
152
+ name_to_path = load_example_filenames(_here / "examples")
153
+ logging.info(f"Loaded {len(name_to_path)} examples")
154
+ demo = gr.Blocks()
155
+
156
+ with demo:
157
+
158
+ gr.Markdown("# Long-Form Summarization: LED & BookSum")
159
+ gr.Markdown(
160
+ "A simple demo using a fine-tuned LED model to summarize long-form text. See [model card](https://huggingface.co/pszemraj/led-large-book-summary) for a notebook with GPU inference (much faster) on Colab."
161
+ )
162
+ with gr.Column():
163
+
164
+ gr.Markdown("## Load Inputs & Select Parameters")
165
+ gr.Markdown(
166
+ "Enter text below in the text area. The text will be summarized [using the selected parameters](https://huggingface.co/blog/how-to-generate). Optionally load an example below or upload a file."
167
+ )
168
+ with gr.Row():
169
+ model_size = gr.Radio(
170
+ choices=["base", "large"], label="Model Variant", value="large"
171
+ )
172
+ num_beams = gr.Radio(
173
+ choices=[2, 3, 4],
174
+ label="Beam Search: # of Beams",
175
+ value=2,
176
+ )
177
+ gr.Markdown(
178
+ "_The base model is less performant than the large model, but is faster and will accept up to 2048 words per input (Large model accepts up to 768)._"
179
+ )
180
+ with gr.Row():
181
+ length_penalty = gr.inputs.Slider(
182
+ minimum=0.5,
183
+ maximum=1.0,
184
+ label="length penalty",
185
+ default=0.7,
186
+ step=0.05,
187
+ )
188
+ token_batch_length = gr.Radio(
189
+ choices=[512, 768, 1024],
190
+ label="token batch length",
191
+ value=512,
192
+ )
193
+
194
+ with gr.Row():
195
+ repetition_penalty = gr.inputs.Slider(
196
+ minimum=1.0,
197
+ maximum=5.0,
198
+ label="repetition penalty",
199
+ default=3.5,
200
+ step=0.1,
201
+ )
202
+ no_repeat_ngram_size = gr.Radio(
203
+ choices=[2, 3, 4],
204
+ label="no repeat ngram size",
205
+ value=3,
206
+ )
207
+ with gr.Row():
208
+ example_name = gr.Dropdown(
209
+ list(name_to_path.keys()),
210
+ label="Choose an Example",
211
+ )
212
+ load_examples_button = gr.Button(
213
+ "Load Example",
214
+ )
215
+ input_text = gr.Textbox(
216
+ lines=6,
217
+ label="Input Text (for summarization)",
218
+ placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
219
+ )
220
+ gr.Markdown("Upload your own file:")
221
+ with gr.Row():
222
+ uploaded_file = gr.File(
223
+ label="Upload a text file",
224
+ file_count="single",
225
+ type="file",
226
+ )
227
+ load_file_button = gr.Button("Load Uploaded File")
228
+
229
+ gr.Markdown("---")
230
+
231
+ with gr.Column():
232
+ gr.Markdown("## Generate Summary")
233
+ gr.Markdown(
234
+ "Summary generation should take approximately 1-2 minutes for most settings."
235
+ )
236
+ summarize_button = gr.Button(
237
+ "Summarize!",
238
+ variant="primary",
239
+ )
240
+
241
+ output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
242
+ gr.Markdown("### Summary Output")
243
+ summary_text = gr.Textbox(
244
+ label="Summary", placeholder="The generated summary will appear here"
245
+ )
246
+ gr.Markdown(
247
+ "The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
248
+ )
249
+ summary_scores = gr.Textbox(
250
+ label="Summary Scores", placeholder="Summary scores will appear here"
251
+ )
252
+
253
+ gr.Markdown("---")
254
+
255
+ with gr.Column():
256
+ gr.Markdown("## About the Model")
257
+ gr.Markdown(
258
+ "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
259
+ )
260
+ gr.Markdown(
261
+ "- The two most important parameters-empirically-are the `num_beams` and `token_batch_length`. However, increasing these will also increase the amount of time it takes to generate a summary. The `length_penalty` and `repetition_penalty` parameters are also important for the model to generate good summaries."
262
+ )
263
+ gr.Markdown(
264
+ "- The model can be used with tag [pszemraj/led-large-book-summary](https://huggingface.co/pszemraj/led-large-book-summary). See the model card for details on usage & a notebook for a tutorial."
265
+ )
266
+ gr.Markdown("---")
267
+
268
+ load_examples_button.click(
269
+ fn=load_single_example_text, inputs=[example_name], outputs=[input_text]
270
+ )
271
+
272
+ load_file_button.click(
273
+ fn=load_uploaded_file, inputs=uploaded_file, outputs=[input_text]
274
+ )
275
+
276
+ summarize_button.click(
277
+ fn=proc_submission,
278
+ inputs=[
279
+ input_text,
280
+ model_size,
281
+ num_beams,
282
+ token_batch_length,
283
+ length_penalty,
284
+ repetition_penalty,
285
+ no_repeat_ngram_size,
286
+ ],
287
+ outputs=[output_text, summary_text, summary_scores],
288
+ )
289
+
290
+ demo.launch(enable_queue=True, share=True)
examples/AM Turing - The Imitation Game.txt ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ A. M. Turing (1950) Computing Machinery and Intelligence. Mind 49: 433-460.
2
+
3
+
4
+
5
+
6
+
7
+ COMPUTING MACHINERY AND INTELLIGENCE
8
+
9
+
10
+
11
+
12
+ By A. M. Turing
13
+
14
+
15
+
16
+
17
+
18
+ 1. The Imitation Game
19
+
20
+
21
+ I propose to consider the question, "Can machines think?" This should begin with definitions of the meaning of the terms "machine" and "think." The definitions might be framed so as to reflect so far as possible the normal use of the words, but this attitude is dangerous, If the meaning of the words "machine" and "think" are to be found by examining how they are commonly used it is difficult to escape the conclusion that the meaning and the answer to the question, "Can machines think?" is to be sought in a statistical survey such as a Gallup poll. But this is absurd. Instead of attempting such a definition I shall replace the question by another, which is closely related to it and is expressed in relatively unambiguous words.
22
+
23
+ The new form of the problem can be described in terms of a game which we call the
24
+
25
+ 'imitation game." It is played with three people, a man (A), a woman (B), and an interrogator (C) who may be of either sex. The interrogator stays in a room apart front the other two. The object of the game for the interrogator is to determine which of the other two is the man and which is the woman. He knows them by labels X and Y, and at the end of the game he says either "X is A and Y is B" or "X is B and Y is A." The interrogator is allowed to put questions to A and B thus: C: Will X please tell me the length of his or her hair?
26
+
27
+ Now suppose X is actually A, then A must answer. It is A's object in the game to try and cause C to make the wrong identification. His answer might therefore be:
28
+
29
+ "My hair is shingled, and the longest strands are about nine inches long."
30
+
31
+ In order that tones of voice may not help the interrogator the answers should be written, or better still, typewritten. The ideal arrangement is to have a teleprinter communicating between the two rooms. Alternatively the question and answers can be repeated by an intermediary. The object of the game for the third player (B) is to help the interrogator.
32
+
33
+ The best strategy for her is probably to give truthful answers. She can add such things as
34
+
35
+ "I am the woman, don't listen to him!" to her answers, but it will avail nothing as the man can make similar remarks.
36
+
37
+ We now ask the question, "What will happen when a machine takes the part of A in this game?" Will the interrogator decide wrongly as often when the game is played like this as he does when the game is played between a man and a woman? These questions replace our original, "Can machines think?"
38
+
39
+ 2. Critique of the New Problem
40
+
41
+ As well as asking, "What is the answer to this new form of the question," one may ask,
42
+
43
+ "Is this new question a worthy one to investigate?" This latter question we investigate without further ado, thereby cutting short an infinite regress.
44
+
45
+ The new problem has the advantage of drawing a fairly sharp line between the physical and the intellectual capacities of a man. No engineer or chemist claims to be able to produce a material which is indistinguishable from the human skin. It is possible that at some time this might be done, but even supposing this invention available we should feel there was little point in trying to make a "thinking machine" more human by dressing it up in such artificial flesh. The form in which we have set the problem reflects this fact in the condition which prevents the interrogator from seeing or touching the other competitors, or hearing -their voices. Some other advantages of the proposed criterion may be shown up by specimen questions and answers. Thus: Q: Please write me a sonnet on the subject of the Forth Bridge.
46
+
47
+ A : Count me out on this one. I never could write poetry.
48
+
49
+ Q: Add 34957 to 70764.
50
+
51
+ A: (Pause about 30 seconds and then give as answer) 105621.
52
+
53
+ Q: Do you play chess?
54
+
55
+ A: Yes.
56
+
57
+ Q: I have K at my K1, and no other pieces. You have only K at K6 and R at R1. It is your move. What do you play?
58
+
59
+ A: (After a pause of 15 seconds) R-R8 mate.
60
+
61
+ The question and answer method seems to be suitable for introducing almost any one of the fields of human endeavour that we wish to include. We do not wish to penalise the machine for its inability to shine in beauty competitions, nor to penalise a man for losing in a race against an aeroplane. The conditions of our game make these disabilities irrelevant. The "witnesses" can brag, if they consider it advisable, as much as they please about their charms, strength or heroism, but the interrogator cannot demand practical demonstrations.
62
+
63
+ The game may perhaps be criticised on the ground that the odds are weighted too heavily against the machine. If the man were to try and pretend to be the machine he would clearly make a very poor showing. He would be given away at once by slowness and inaccuracy in arithmetic. May not machines carry out something which ought to be described as thinking but which is very different from what a man does? This objection is
64
+
65
+ a very strong one, but at least we can say that if, nevertheless, a machine can be constructed to play the imitation game satisfactorily, we need not be troubled by this objection.
66
+
67
+ It might be urged that when playing the "imitation game" the best strategy for the machine may possibly be something other than imitation of the behaviour of a man. This may be, but I think it is unlikely that there is any great effect of this kind. In any case there is no intention to investigate here the theory of the game, and it will be assumed that the best strategy is to try to provide answers that would naturally be given by a man.
68
+
69
+ 3. The Machines Concerned in the Game
70
+
71
+ The question which we put in 1 will not be quite definite until we have specified what we mean by the word "machine." It is natural that we should wish to permit every kind of engineering technique to be used in our machines. We also wish to allow the possibility than an engineer or team of engineers may construct a machine which works, but whose manner of operation cannot be satisfactorily described by its constructors because they have applied a method which is largely experimental. Finally, we wish to exclude from the machines men born in the usual manner. It is difficult to frame the definitions so as to satisfy these three conditions. One might for instance insist that the team of engineers should be all of one sex, but this would not really be satisfactory, for it is probably possible to rear a complete individual from a single cell of the skin (say) of a man. To do so would be a feat of biological technique deserving of the very highest praise, but we would not be inclined to regard it as a case of "constructing a thinking machine." This prompts us to abandon the requirement that every kind of technique should be permitted.
72
+
73
+ We are the more ready to do so in view of the fact that the present interest in "thinking machines" has been aroused by a particular kind of machine, usually called an "electronic computer" or "digital computer." Following this suggestion we only permit digital computers to take part in our game.
74
+
75
+ This restriction appears at first sight to be a very drastic one. I shall attempt to show that it is not so in reality. To do this necessitates a short account of the nature and properties of these computers.
76
+
77
+ It may also be said that this identification of machines with digital computers, like our criterion for "thinking," will only be unsatisfactory if (contrary to my belief), it turns out that digital computers are unable to give a good showing in the game.
78
+
79
+ There are already a number of digital computers in working order, and it may be asked,
80
+
81
+ "Why not try the experiment straight away? It would be easy to satisfy the conditions of the game. A number of interrogators could be used, and statistics compiled to show how often the right identification was given." The short answer is that we are not asking whether all digital computers would do well in the game nor whether the computers at present available would do well, but whether there are imaginable computers which would do well. But this is only the short answer. We shall see this question in a different light later.
82
+
83
+ 4. Digital Computers
84
+
85
+ The idea behind digital computers may be explained by saying that these machines are intended to carry out any operations which could be done by a human computer. The human computer is supposed to be following fixed rules; he has no authority to deviate from them in any detail. We may suppose that these rules are supplied in a book, which is altered whenever he is put on to a new job. He has also an unlimited supply of paper on which he does his calculations. He may also do his multiplications and additions on a
86
+
87
+ "desk machine," but this is not important.
88
+
89
+ If we use the above explanation as a definition we shall be in danger of circularity of argument. We avoid this by giving an outline. of the means by which the desired effect is achieved. A digital computer can usually be regarded as consisting of three parts: (i) Store.
90
+
91
+ (ii) Executive unit.
92
+
93
+ (iii) Control.
94
+
95
+ The store is a store of information, and corresponds to the human computer's paper, whether this is the paper on which he does his calculations or that on which his book of rules is printed. In so far as the human computer does calculations in his bead a part of the store will correspond to his memory.
96
+
97
+ The executive unit is the part which carries out the various individual operations involved in a calculation. What these individual operations are will vary from machine to machine.
98
+
99
+ Usually fairly lengthy operations can be done such as "Multiply 3540675445 by 7076345687" but in some machines only very simple ones such as "Write down 0" are possible.
100
+
101
+ We have mentioned that the "book of rules" supplied to the computer is replaced in the machine by a part of the store. It is then called the "table of instructions." It is the duty of the control to see that these instructions are obeyed correctly and in the right order. The control is so constructed that this necessarily happens.
102
+
103
+ The information in the store is usually broken up into packets of moderately small size. In one machine, for instance, a packet might consist of ten decimal digits. Numbers are assigned to the parts of the store in which the various packets of information are stored, in some systematic manner. A typical instruction might say-
104
+
105
+ "Add the number stored in position 6809 to that in 4302 and put the result back into the latter storage position."
106
+
107
+ Needless to say it would not occur in the machine expressed in English. It would more likely be coded in a form such as 6809430217. Here 17 says which of various possible
108
+
109
+ operations is to be performed on the two numbers. In this case the)e operation is that described above, viz., "Add the number. . . ." It will be noticed that the instruction takes up 10 digits and so forms one packet of information, very conveniently. The control will normally take the instructions to be obeyed in the order of the positions in which they are stored, but occasionally an instruction such as
110
+
111
+ "Now obey the instruction stored in position 5606, and continue from there"
112
+
113
+ may be encountered, or again
114
+
115
+ "If position 4505 contains 0 obey next the instruction stored in 6707, otherwise continue straight on."
116
+
117
+ Instructions of these latter types are very important because they make it possible for a sequence of operations to be replaced over and over again until some condition is fulfilled, but in doing so to obey, not fresh instructions on each repetition, but the same ones over and over again. To take a domestic analogy. Suppose Mother wants Tommy to call at the cobbler's every morning on his way to school to see if her shoes are done, she can ask him afresh every morning. Alternatively she can stick up a notice once and for all in the hall which he will see when he leaves for school and which tells him to call for the shoes, and also to destroy the notice when he comes back if he has the shoes with him.
118
+
119
+ The reader must accept it as a fact that digital computers can be constructed, and indeed have been constructed, according to the principles we have described, and that they can in fact mimic the actions of a human computer very closely.
120
+
121
+ The book of rules which we have described our human computer as using is of course a convenient fiction. Actual human computers really remember what they have got to do. If one wants to make a machine mimic the behaviour of the human computer in some complex operation one has to ask him how it is done, and then translate the answer into the form of an instruction table. Constructing instruction tables is usually described as
122
+
123
+ "programming." To "programme a machine to carry out the operation A" means to put the appropriate instruction table into the machine so that it will do A.
124
+
125
+ An interesting variant on the idea of a digital computer is a "digital computer with a random element." These have instructions involving the throwing of a die or some equivalent electronic process; one such instruction might for instance be, "Throw the die and put the-resulting number into store 1000." Sometimes such a machine is described as having free will (though I would not use this phrase myself), It is not normally possible to determine from observing a machine whether it has a random element, for a similar effect can be produced by such devices as making the choices depend on the digits of the decimal for .
126
+
127
+ Most actual digital computers have only a finite store. There is no theoretical difficulty in the idea of a computer with an unlimited store. Of course only a finite part can have been used at any one time. Likewise only a finite amount can have been constructed, but we
128
+
129
+ can imagine more and more being added as required. Such computers have special theoretical interest and will be called infinitive capacity computers.
130
+
131
+ The idea of a digital computer is an old one. Charles Babbage, Lucasian Professor of Mathematics at Cambridge from 1828 to 1839, planned such a machine, called the Analytical Engine, but it was never completed. Although Babbage had all the essential ideas, his machine was not at that time such a very attractive prospect. The speed which would have been available would be definitely faster than a human computer but something like I 00 times slower than the Manchester machine, itself one of the slower of the modern machines, The storage was to be purely mechanical, using wheels and cards.
132
+
133
+ The fact that Babbage's Analytical Engine was to be entirely mechanical will help us to rid ourselves of a superstition. Importance is often attached to the fact that modern digital computers are electrical, and that the nervous system also is electrical. Since Babbage's machine was not electrical, and since all digital computers are in a sense equivalent, we see that this use of electricity cannot be of theoretical importance. Of course electricity usually comes in where fast signalling is concerned, so that it is not surprising that we find it in both these connections. In the nervous system chemical phenomena are at least as important as electrical. In certain computers the storage system is mainly acoustic. The feature of using electricity is thus seen to be only a very superficial similarity. If we wish to find such similarities we should took rather for mathematical analogies of function.
134
+
135
+
136
+
137
+
138
+
139
+ 5. Universality of Digital Computers
140
+
141
+
142
+ The digital computers considered in the last section may be classified amongst the
143
+
144
+ "discrete-state machines." These are the machines which move by sudden jumps or clicks from one quite definite state to another. These states are sufficiently different for the possibility of confusion between them to be ignored. Strictly speaking there, are no such machines. Everything really moves continuously. But there are many kinds of machine which can profitably be thought of as being discrete-state machines. For instance in considering the switches for a lighting system it is a convenient fiction that each switch must be definitely on or definitely off. There must be intermediate positions, but for most purposes we can forget about them. As an example of a discrete-state machine we might consider a wheel which clicks round through 120 once a second, but may be stopped by a
145
+
146
+ ]ever which can be operated from outside; in addition a lamp is to light in one of the positions of the wheel. This machine could be described abstractly as follows. The internal state of the machine (which is described by the position of the wheel) may be q1, q2 or q3. There is an input signal i0. or i1 (position of ]ever). The internal state at any moment is determined by the last state and input signal according to the table (TABLE DELETED)
147
+
148
+
149
+
150
+ The output signals, the only externally visible indication of the internal state (the light) are described by the table
151
+
152
+ State q1 q2 q3
153
+
154
+ output o0 o0 o1
155
+
156
+ This example is typical of discrete-state machines. They can be described by such tables provided they have only a finite number of possible states.
157
+
158
+ It will seem that given the initial state of the machine and the input signals it is always possible to predict all future states, This is reminiscent of Laplace's view that from the complete state of the universe at one moment of time, as described by the positions and velocities of all particles, it should be possible to predict all future states. The prediction which we are considering is, however, rather nearer to practicability than that considered by Laplace. The system of the "universe as a whole" is such that quite small errors in the initial conditions can have an overwhelming effect at a later time. The displacement of a single electron by a billionth of a centimetre at one moment might make the difference between a man being killed by an avalanche a year later, or escaping. It is an essential property of the mechanical systems which we have called "discrete-state machines" that this phenomenon does not occur. Even when we consider the actual physical machines instead of the idealised machines, reasonably accurate knowledge of the state at one moment yields reasonably accurate knowledge any number of steps later.
159
+
160
+ As we have mentioned, digital computers fall within the class of discrete-state machines.
161
+
162
+ But the number of states of which such a machine is capable is usually enormously large.
163
+
164
+ For instance, the number for the machine now working at Manchester is about 2 165,000, i.e., about 10 50,000. Compare this with our example of the clicking wheel described above, which had three states. It is not difficult to see why the number of states should be so immense. The computer includes a store corresponding to the paper used by a human computer. It must be possible to write into the store any one of the combinations of symbols which might have been written on the paper. For simplicity suppose that only digits from 0 to 9 are used as symbols. Variations in handwriting are ignored. Suppose the computer is allowed 100 sheets of paper each containing 50 lines each with room for 30 digits. Then the number of states is 10 100x50x30 i.e., 10 150,000 . This is about the number of states of three Manchester machines put together. The logarithm to the base two of the number of states is usually called the "storage capacity" of the machine. Thus the Manchester machine has a storage capacity of about 165,000 and the wheel machine of our example about 1.6. If two machines are put together their capacities must be added to obtain the capacity of the resultant machine. This leads to the possibility of statements such as "The Manchester machine contains 64 magnetic tracks each with a capacity of 2560, eight electronic tubes with a capacity of 1280. Miscellaneous storage amounts to about 300 making a total of 174,380."
165
+
166
+ Given the table corresponding to a discrete-state machine it is possible to predict what it will do. There is no reason why this calculation should not be carried out by means of a digital computer. Provided it could be carried out sufficiently quickly the digital computer could mimic the behavior of any discrete-state machine. The imitation game could then be played with the machine in question (as B) and the mimicking digital
167
+
168
+ computer (as A) and the interrogator would be unable to distinguish them. Of course the digital computer must have an adequate storage capacity as well as working sufficiently fast. Moreover, it must be programmed afresh for each new machine which it is desired to mimic.
169
+
170
+ This special property of digital computers, that they can mimic any discrete-state machine, is described by saying that they are universal machines. The existence of machines with this property has the important consequence that, considerations of speed apart, it is unnecessary to design various new machines to do various computing processes. They can all be done with one digital computer, suitably programmed for each case. It 'ill be seen that as a consequence of this all digital computers are in a sense equivalent.
171
+
172
+ We may now consider again the point raised at the end of Β§3. It was suggested tentatively that the question, "Can machines think?" should be replaced by "Are there imaginable digital computers which would do well in the imitation game?" If we wish we can make this superficially more general and ask "Are there discrete-state machines which would do well?" But in view of the universality property we see that either of these questions is equivalent to this, "Let us fix our attention on one particular digital computer C. Is it true that by modifying this computer to have an adequate storage, suitably increasing its speed of action, and providing it with an appropriate programme, C can be made to play satisfactorily the part of A in the imitation game, the part of B being taken by a man?"
173
+
174
+ 6. Contrary Views on the Main Question
175
+
176
+ We may now consider the ground to have been cleared and we are ready to proceed to the debate on our question, "Can machines think?" and the variant of it quoted at the end of the last section. We cannot altogether abandon the original form of the problem, for opinions will differ as to the appropriateness of the substitution and we must at least listen to what has to be said in this connexion.
177
+
178
+ It will simplify matters for the reader if I explain first my own beliefs in the matter.
179
+
180
+ Consider first the more accurate form of the question. I believe that in about fifty years'
181
+
182
+ time it will be possible, to programme computers, with a storage capacity of about 109, to make them play the imitation game so well that an average interrogator will not have more than 70 per cent chance of making the right identification after five minutes of questioning. The original question, "Can machines think?" I believe to be too meaningless to deserve discussion. Nevertheless I believe that at the end of the century the use of words and general educated opinion will have altered so much that one will be able to speak of machines thinking without expecting to be contradicted. I believe further that no useful purpose is served by concealing these beliefs. The popular view that scientists proceed inexorably from well-established fact to well-established fact, never being influenced by any improved conjecture, is quite mistaken. Provided it is made clear which are proved facts and which are conjectures, no harm can result. Conjectures are of great importance since they suggest useful lines of research.
183
+
184
+ I now proceed to consider opinions opposed to my own.
185
+
186
+ (1) The Theological Objection
187
+
188
+ Thinking is a function of man's immortal soul. God has given an immortal soul to every man and woman, but not to any other animal or to machines. Hence no animal or machine can think.
189
+
190
+ I am unable to accept any part of this, but will attempt to reply in theological terms. I should find the argument more convincing if animals were classed with men, for there is a greater difference, to my mind, between the typical animate and the inanimate than there is between man and the other animals. The arbitrary character of the orthodox view becomes clearer if we consider how it might appear to a member of some other religious community. How do Christians regard the Moslem view that women have no souls? But let us leave this point aside and return to the main argument. It appears to me that the argument quoted above implies a serious restriction of the omnipotence of the Almighty.
191
+
192
+ It is admitted that there are certain things that He cannot do such as making one equal to two, but should we not believe that He has freedom to confer a soul on an elephant if He sees fit? We might expect that He would only exercise this power in conjunction with a mutation which provided the elephant with an appropriately improved brain to minister to the needs of this sort[. An argument of exactly similar form may be made for the case of machines. It may seem different because it is more difficult to "swallow." But this really only means that we think it would be less likely that He would consider the circumstances suitable for conferring a soul. The circumstances in question are discussed in the rest of this paper. In attempting to construct such machines we should not be irreverently usurping His power of creating souls, any more than we are in the procreation of children: rather we are, in either case, instruments of His will providing
193
+
194
+ .mansions for the souls that He creates.
195
+
196
+ However, this is mere speculation. I am not very impressed with theological arguments whatever they may be used to support. Such arguments have often been found unsatisfactory in the past. In the time of Galileo it was argued that the texts, "And the sun stood still . . . and hasted not to go down about a whole day" (Joshua x. 13) and "He laid the foundations of the earth, that it should not move at any time" (Psalm cv. 5) were an adequate refutation of the Copernican theory. With our present knowledge such an argument appears futile. When that knowledge was not available it made a quite different impression.
197
+
198
+ (2) The "Heads in the Sand" Objection
199
+
200
+ The consequences of machines thinking would be too dreadful. Let us hope and believe that they cannot do so."
201
+
202
+ This argument is seldom expressed quite so openly as in the form above. But it affects most of us who think about it at all. We like to believe that Man is in some subtle way superior to the rest of creation. It is best if he can be shown to be necessarily superior, for
203
+
204
+ then there is no danger of him losing his commanding position. The popularity of the theological argument is clearly connected with this feeling. It is likely to be quite strong in intellectual people, since they value the power of thinking more highly than others, and are more inclined to base their belief in the superiority of Man on this power.
205
+
206
+ I do not think that this argument is sufficiently substantial to require refutation.
207
+
208
+ Consolation would be more appropriate: perhaps this should be sought in the transmigration of souls.
209
+
210
+ (3) The Mathematical Objection
211
+
212
+ There are a number of results of mathematical logic which can be used to show that there are limitations to the powers of discrete-state machines. The best known of these results is known as Godel's theorem ( 1931 ) and shows that in any sufficiently powerful logical system statements can be formulated which can neither be proved nor disproved within the system, unless possibly the system itself is inconsistent. There are other, in some respects similar, results due to Church (1936), Kleene (1935), Rosser, and Turing (1937).
213
+
214
+ The latter result is the most convenient to consider, since it refers directly to machines, whereas the others can only be used in a comparatively indirect argument: for instance if Godel's theorem is to be used we need in addition to have some means of describing logical systems in terms of machines, and machines in terms of logical systems. The result in question refers to a type of machine which is essentially a digital computer with an infinite capacity. It states that there are certain things that such a machine cannot do. If it is rigged up to give answers to questions as in the imitation game, there will be some questions to which it will either give a wrong answer, or fail to give an answer at all however much time is allowed for a reply. There may, of course, be many such questions, and questions which cannot be answered by one machine may be satisfactorily answered by another. We are of course supposing for the present that the questions are of the kind to which an answer "Yes" or "No" is appropriate, rather than questions such as "What do you think of Picasso?" The questions that we know the machines must fail on are of this type, "Consider the machine specified as follows. . . . Will this machine ever answer 'Yes'
215
+
216
+ to any question?" The dots are to be replaced by a description of some machine in a standard form, which could be something like that used in Β§5. When the machine described bears a certain comparatively simple relation to the machine which is under interrogation, it can be shown that the answer is either wrong or not forthcoming. This is the mathematical result: it is argued that it proves a disability of machines to which the human intellect is not subject.
217
+
218
+ The short answer to this argument is that although it is established that there are limitations to the Powers If any particular machine, it has only been stated, without any sort of proof, that no such limitations apply to the human intellect. But I do not think this view can be dismissed quite so lightly. Whenever one of these machines is asked the appropriate critical question, and gives a definite answer, we know that this answer must be wrong, and this gives us a certain feeling of superiority. Is this feeling illusory? It is no doubt quite genuine, but I do not think too much importance should be attached to it. We too often give wrong answers to questions ourselves to be justified in being very pleased
219
+
220
+ at such evidence of fallibility on the part of the machines. Further, our superiority can only be felt on such an occasion in relation to the one machine over which we have scored our petty triumph. There would be no question of triumphing simultaneously over all machines. In short, then, there might be men cleverer than any given machine, but then again there might be other machines cleverer again, and so on.
221
+
222
+ Those who hold to the mathematical argument would, I think, mostly he willing to accept the imitation game as a basis for discussion, Those who believe in the two previous objections would probably not be interested in any criteria.
223
+
224
+ (4) The Argument from Consciousness
225
+
226
+ This argument is very, well expressed in Professor Jefferson's Lister Oration for 1949, from which I quote. "Not until a machine can write a sonnet or compose a concerto because of thoughts and emotions felt, and not by the chance fall of symbols, could we agree that machine equals brain-that is, not only write it but know that it had written it.
227
+
228
+ No mechanism could feel (and not merely artificially signal, an easy contrivance) pleasure at its successes, grief when its valves fuse, be warmed by flattery, be made miserable by its mistakes, be charmed by sex, be angry or depressed when it cannot get what it wants."
229
+
230
+ This argument appears to be a denial of the validity of our test. According to the most extreme form of this view the only way by which one could be sure that machine thinks is to be the machine and to feel oneself thinking. One could then describe these feelings to the world, but of course no one would be justified in taking any notice. Likewise according to this view the only way to know that a man thinks is to be that particular man. It is in fact the solipsist point of view. It may be the most logical view to hold but it makes communication of ideas difficult. A is liable to believe "A thinks but B does not"
231
+
232
+ whilst B believes "B thinks but A does not." instead of arguing continually over this point it is usual to have the polite convention that everyone thinks.
233
+
234
+ I am sure that Professor Jefferson does not wish to adopt the extreme and solipsist point of view. Probably he would be quite willing to accept the imitation game as a test. The game (with the player B omitted) is frequently used in practice under the name of viva voce to discover whether some one really understands something or has "learnt it parrot fashion." Let us listen in to a part of such a viva voce: Interrogator: In the first line of your sonnet which reads "Shall I compare thee to a summer's day," would not "a spring day" do as well or better?
235
+
236
+ Witness: It wouldn't scan.
237
+
238
+ Interrogator: How about "a winter's day," That would scan all right.
239
+
240
+ Witness: Yes, but nobody wants to be compared to a winter's day.
241
+
242
+ Interrogator: Would you say Mr. Pickwick reminded you of Christmas?
243
+
244
+ Witness: In a way.
245
+
246
+ Interrogator: Yet Christmas is a winter's day, and I do not think Mr. Pickwick would mind the comparison.
247
+
248
+ Witness: I don't think you're serious. By a winter's day one means a typical winter's day, rather than a special one like Christmas.
249
+
250
+ And so on, What would Professor Jefferson say if the sonnet-writing machine was able to answer like this in the viva voce? I do not know whether he would regard the machine as
251
+
252
+ "merely artificially signalling" these answers, but if the answers were as satisfactory and sustained as in the above passage I do not think he would describe it as "an easy contrivance." This phrase is, I think, intended to cover such devices as the inclusion in the machine of a record of someone reading a sonnet, with appropriate switching to turn it on from time to time.
253
+
254
+ In short then, I think that most of those who support the argument from consciousness could be persuaded to abandon it rather than be forced into the solipsist position. They will then probably be willing to accept our test.
255
+
256
+ I do not wish to give the impression that I think there is no mystery about consciousness.
257
+
258
+ There is, for instance, something of a paradox connected with any attempt to localise it.
259
+
260
+ But I do not think these mysteries necessarily need to be solved before we can answer the question with which we are concerned in this paper.
261
+
262
+ (5) Arguments from Various Disabilities
263
+
264
+ These arguments take the form, "I grant you that you can make machines do all the things you have mentioned but you will never be able to make one to do X." Numerous features X are suggested in this connexion I offer a selection:
265
+
266
+ Be kind, resourceful, beautiful, friendly, have initiative, have a sense of humour, tell right from wrong, make mistakes, fall in love, enjoy strawberries and cream, make some one fall in love with it, learn from experience, use words properly, be the subject of its own thought, have as much diversity of behaviour as a man, do something really new.
267
+
268
+ No support is usually offered for these statements. I believe they are mostly founded on the principle of scientific induction. A man has seen thousands of machines in his lifetime. From what he sees of them he draws a number of general conclusions. They are ugly, each is designed for a very limited purpose, when required for a minutely different purpose they are useless, the variety of behaviour of any one of them is very small, etc., etc. Naturally he concludes that these are necessary properties of machines in general.
269
+
270
+ Many of these limitations are associated with the very small storage capacity of most machines. (I am assuming that the idea of storage capacity is extended in some way to
271
+
272
+ cover machines other than discrete-state machines. The exact definition does not matter as no mathematical accuracy is claimed in the present discussion,) A few years ago, when very little had been heard of digital computers, it was possible to elicit much incredulity concerning them, if one mentioned their properties without describing their construction.
273
+
274
+ That was presumably due to a similar application of the principle of scientific induction.
275
+
276
+ These applications of the principle are of course largely unconscious. When a burnt child fears the fire and shows that he fears it by avoiding it, f should say that he was applying scientific induction. (I could of course also describe his behaviour in many other ways.) The works and customs of mankind do not seem to be very suitable material to which to apply scientific induction. A very large part of space-time must be investigated, if reliable results are to be obtained. Otherwise we may (as most English 'Children do) decide that everybody speaks English, and that it is silly to learn French.
277
+
278
+ There are, however, special remarks to be made about many of the disabilities that have been mentioned. The inability to enjoy strawberries and cream may have struck the reader as frivolous. Possibly a machine might be made to enjoy this delicious dish, but any attempt to make one do so would be idiotic. What is important about this disability is that it contributes to some of the other disabilities, e.g., to the difficulty of the same kind of friendliness occurring between man and machine as between white man and white man, or between black man and black man.
279
+
280
+ The claim that "machines cannot make mistakes" seems a curious one. One is tempted to retort, "Are they any the worse for that?" But let us adopt a more sympathetic attitude, and try to see what is really meant. I think this criticism can be explained in terms of the imitation game. It is claimed that the interrogator could distinguish the machine from the man simply by setting them a number of problems in arithmetic. The machine would be unmasked because of its deadly accuracy. The reply to this is simple. The machine (programmed for playing the game) would not attempt to give the right answers to the arithmetic problems. It would deliberately introduce mistakes in a manner calculated to confuse the interrogator. A mechanical fault would probably show itself through an unsuitable decision as to what sort of a mistake to make in the arithmetic. Even this interpretation of the criticism is not sufficiently sympathetic. But we cannot afford the space to go into it much further. It seems to me that this criticism depends on a confusion between two kinds of mistake, We may call them "errors of functioning" and "errors of conclusion." Errors of functioning are due to some mechanical or electrical fault which causes the machine to behave otherwise than it was designed to do. In philosophical discussions one likes to ignore the possibility of such errors; one is therefore discussing
281
+
282
+ "abstract machines." These abstract machines are mathematical fictions rather than physical objects. By definition they are incapable of errors of functioning. In this sense we can truly say that "machines can never make mistakes." Errors of conclusion can only arise when some meaning is attached to the output signals from the machine. The machine might, for instance, type out mathematical equations, or sentences in English.
283
+
284
+ When a false proposition is typed we say that the machine has committed an error of conclusion. There is clearly no reason at all for saying that a machine cannot make this kind of mistake. It might do nothing but type out repeatedly "O = I." To take a less
285
+
286
+ perverse example, it might have some method for drawing conclusions by scientific induction. We must expect such a method to lead occasionally to erroneous results.
287
+
288
+ The claim that a machine cannot be the subject of its own thought can of course only be answered if it can be shown that the machine has some thought with some subject matter.
289
+
290
+ Nevertheless, "the subject matter of a machine's operations" does seem to mean something, at least to the people who deal with it. If, for instance, the machine was trying to find a solution of the equation x2 - 40x - 11 = 0 one would be tempted to describe this equation as part of the machine's subject matter at that moment. In this sort of sense a machine undoubtedly can be its own subject matter. It may be used to help in making up its own programmes, or to predict the effect of alterations in its own structure. By observing the results of its own behaviour it can modify its own programmes so as to achieve some purpose more effectively. These are possibilities of the near future, rather than Utopian dreams.
291
+
292
+ The criticism that a machine cannot have much diversity of behaviour is just a way of saying that it cannot have much storage capacity. Until fairly recently a storage capacity of even a thousand digits was very rare.
293
+
294
+ The criticisms that we are considering here are often disguised forms of the argument from consciousness, Usually if one maintains that a machine can do one of these things, and describes the kind of method that the machine could use, one will not make much of an impression. It is thought that tile method (whatever it may be, for it must be mechanical) is really rather base. Compare the parentheses in Jefferson's statement quoted on page 22.
295
+
296
+ (6) Lady Lovelace's Objection
297
+
298
+ Our most detailed information of Babbage's Analytical Engine comes from a memoir by Lady Lovelace ( 1842). In it she states, "The Analytical Engine has no pretensions to originate anything. It can do whatever we know how to order it to perform" (her italics).
299
+
300
+ This statement is quoted by Hartree ( 1949) who adds: "This does not imply that it may not be possible to construct electronic equipment which will 'think for itself,' or in which, in biological terms, one could set up a conditioned reflex, which would serve as a basis for 'learning.' Whether this is possible in principle or not is a stimulating and exciting question, suggested by some of these recent developments But it did not seem that the machines constructed or projected at the time had this property."
301
+
302
+ I am in thorough agreement with Hartree over this. It will be noticed that he does not assert that the machines in question had not got the property, but rather that the evidence available to Lady Lovelace did not encourage her to believe that they had it. It is quite possible that the machines in question had in a sense got this property. For suppose that some discrete-state machine has the property. The Analytical Engine was a universal digital computer, so that, if its storage capacity and speed were adequate, it could by suitable programming be made to mimic the machine in question. Probably this argument
303
+
304
+ did not occur to the Countess or to Babbage. In any case there was no obligation on them to claim all that could be claimed.
305
+
306
+ This whole question will be considered again under the heading of learning machines.
307
+
308
+ A variant of Lady Lovelace's objection states that a machine can "never do anything really new." This may be parried for a moment with the saw, "There is nothing new under the sun." Who can be certain that "original work" that he has done was not simply the growth of the seed planted in him by teaching, or the effect of following well-known general principles. A better variant of the objection says that a machine can never "take us by surprise." This statement is a more direct challenge and can be met directly.
309
+
310
+ Machines take me by surprise with great frequency. This is largely because I do not do sufficient calculation to decide what to expect them to do, or rather because, although I do a calculation, I do it in a hurried, slipshod fashion, taking risks. Perhaps I say to myself, "I suppose the Voltage here ought to he the same as there: anyway let's assume it is." Naturally I am often wrong, and the result is a surprise for me for by the time the experiment is done these assumptions have been forgotten. These admissions lay me open to lectures on the subject of my vicious ways, but do not throw any doubt on my credibility when I testify to the surprises I experience.
311
+
312
+ I do not expect this reply to silence my critic. He will probably say that h surprises are due to some creative mental act on my part, and reflect no credit on the machine. This leads us back to the argument from consciousness, and far from the idea of surprise. It is a line of argument we must consider closed, but it is perhaps worth remarking that the appreciation of something as surprising requires as much of a "creative mental act"
313
+
314
+ whether the surprising event originates from a man, a book, a machine or anything else.
315
+
316
+ The view that machines cannot give rise to surprises is due, I believe, to a fallacy to which philosophers and mathematicians are particularly subject. This is the assumption that as soon as a fact is presented to a mind all consequences of that fact spring into the mind simultaneously with it. It is a very useful assumption under many circumstances, but one too easily forgets that it is false. A natural consequence of doing so is that one then assumes that there is no virtue in the mere working out of consequences from data and general principles.
317
+
318
+ (7) Argument from Continuity in the Nervous System
319
+
320
+ The nervous system is certainly not a discrete-state machine. A small error in the information about the size of a nervous impulse impinging on a neuron, may make a large difference to the size of the outgoing impulse. It may be argued that, this being so, one cannot expect to be able to mimic the behaviour of the nervous system with a discrete-state system.
321
+
322
+ It is true that a discrete-state machine must be different from a continuous machine. But if we adhere to the conditions of the imitation game, the interrogator will not be able to take any advantage of this difference. The situation can be made clearer if we consider sonic
323
+
324
+ other simpler continuous machine. A differential analyser will do very well. (A differential analyser is a certain kind of machine not of the discrete-state type used for some kinds of calculation.) Some of these provide their answers in a typed form, and so are suitable for taking part in the game. It would not be possible for a digital computer to predict exactly what answers the differential analyser would give to a problem, but it would be quite capable of giving the right sort of answer. For instance, if asked to give the value of (actually about 3.1416) it would be reasonable to choose at random between the values 3.12, 3.13, 3.14, 3.15, 3.16 with the probabilities of 0.05, 0.15, 0.55, 0.19, 0.06
325
+
326
+ (say). Under these circumstances it would be very difficult for the interrogator to distinguish the differential analyser from the digital computer.
327
+
328
+ (8) The Argument from Informality of Behaviour
329
+
330
+ It is not possible to produce a set of rules purporting to describe what a man should do in every conceivable set of circumstances. One might for instance have a rule that one is to stop when one sees a red traffic light, and to go if one sees a green one, but what if by some fault both appear together? One may perhaps decide that it is safest to stop. But some further difficulty may well arise from this decision later. To attempt to provide rules of conduct to cover every eventuality, even those arising from traffic lights, appears to be impossible. With all this I agree.
331
+
332
+ From this it is argued that we cannot be machines. I shall try to reproduce the argument, but I fear I shall hardly do it justice. It seems to run something like this. "if each man had a definite set of rules of conduct by which he regulated his life he would be no better than a machine. But there are no such rules, so men cannot be machines." The undistributed middle is glaring. I do not think the argument is ever put quite like this, but I believe this is the argument used nevertheless. There may however be a certain confusion between
333
+
334
+ "rules of conduct" and "laws of behaviour" to cloud the issue. By "rules of conduct" I mean precepts such as "Stop if you see red lights," on which one can act, and of which one can be conscious. By "laws of behaviour" I mean laws of nature as applied to a man's body such as "if you pinch him he will squeak." If we substitute "laws of behaviour which regulate his life" for "laws of conduct by which he regulates his life" in the argument quoted the undistributed middle is no longer insuperable. For we believe that it is not only true that being regulated by laws of behaviour implies being some sort of machine (though not necessarily a discrete-state machine), but that conversely being such a machine implies being regulated by such laws. However, we cannot so easily convince ourselves of the absence of complete laws of behaviour as of complete rules of conduct.
335
+
336
+ The only way we know of for finding such laws is scientific observation, and we certainly know of no circumstances under which we could say, "We have searched enough. There are no such laws."
337
+
338
+ We can demonstrate more forcibly that any such statement would be unjustified. For suppose we could be sure of finding such laws if they existed. Then given a discrete-state machine it should certainly be possible to discover by observation sufficient about it to predict its future behaviour, and this within a reasonable time, say a thousand years. But this does not seem to be the case. I have set up on the Manchester computer a small
339
+
340
+ programme using only 1,000 units of storage, whereby the machine supplied with one sixteen-figure number replies with another within two seconds. I would defy anyone to learn from these replies sufficient about the programme to be able to predict any replies to untried values.
341
+
342
+ (9) The Argument from Extrasensory Perception
343
+
344
+ I assume that the reader is familiar with the idea of extrasensory perception, and the meaning of the four items of it, viz., telepathy, clairvoyance, precognition and psychokinesis. These disturbing phenomena seem to deny all our usual scientific ideas.
345
+
346
+ How we should like to discredit them! Unfortunately the statistical evidence, at least for telepathy, is overwhelming. It is very difficult to rearrange one's ideas so as to fit these new facts in. Once one has accepted them it does not seem a very big step to believe in ghosts and bogies. The idea that our bodies move simply according to the known laws of physics, together with some others not yet discovered but somewhat similar, would be one of the first to go.
347
+
348
+ This argument is to my mind quite a strong one. One can say in reply that many scientific theories seem to remain workable in practice, in spite of clashing with ESP; that in fact one can get along very nicely if one forgets about it. This is rather cold comfort, and one fears that thinking is just the kind of phenomenon where ESP may be especially relevant.
349
+
350
+ A more specific argument based on ESP might run as follows: "Let us play the imitation game, using as witnesses a man who is good as a telepathic receiver, and a digital computer. The interrogator can ask such questions as 'What suit does the card in my right hand belong to?' The man by telepathy or clairvoyance gives the right answer 130 times out of 400 cards. The machine can only guess at random, and perhaps gets 104 right, so the interrogator makes the right identification." There is an interesting possibility which opens here. Suppose the digital computer contains a random number generator. Then it will be natural to use this to decide what answer to give. But then the random number generator will be subject to the psychokinetic powers of the interrogator. Perhaps this psychokinesis might cause the machine to guess right more often than would be expected on a probability calculation, so that the interrogator might still be unable to make the right identification. On the other hand, he might be able to guess right without any questioning, by clairvoyance. With ESP anything may happen.
351
+
352
+ If telepathy is admitted it will be necessary to tighten our test up. The situation could be regarded as analogous to that which would occur if the interrogator were talking to himself and one of the competitors was listening with his ear to the wall. To put the competitors into a "telepathy-proof room" would satisfy all requirements.
353
+
354
+
355
+
356
+
357
+
358
+ 7. Learning Machines
359
+
360
+
361
+ The reader will have anticipated that I have no very convincing arguments of a positive nature to support my views. If I had I should not have taken such pains to point out the fallacies in contrary views. Such evidence as I have I shall now give.
362
+
363
+ Let us return for a moment to Lady Lovelace's objection, which stated that the machine can only do what we tell it to do. One could say that a man can "inject" an idea into the machine, and that it will respond to a certain extent and then drop into quiescence, like a piano string struck by a hammer. Another simile would be an atomic pile of less than critical size: an injected idea is to correspond to a neutron entering the pile from without.
364
+
365
+ Each such neutron will cause a certain disturbance which eventually dies away. If, however, the size of the pile is sufficiently increased, tire disturbance caused by such an incoming neutron will very likely go on and on increasing until the whole pile is destroyed. Is there a corresponding phenomenon for minds, and is there one for machines? There does seem to be one for the human mind. The majority of them seem to be "subcritical," i.e., to correspond in this analogy to piles of subcritical size. An idea presented to such a mind will on average give rise to less than one idea in reply. A smallish proportion are supercritical. An idea presented to such a mind that may give rise to a whole "theory" consisting of secondary, tertiary and more remote ideas. Animals minds seem to be very definitely subcritical. Adhering to this analogy we ask, "Can a machine be made to be supercritical?"
366
+
367
+ The "skin-of-an-onion" analogy is also helpful. In considering the functions of the mind or the brain we find certain operations which we can explain in purely mechanical terms.
368
+
369
+ This we say does not correspond to the real mind: it is a sort of skin which we must strip off if we are to find the real mind. But then in what remains we find a further skin to be stripped off, and so on. Proceeding in this way do we ever come to the "real" mind, or do we eventually come to the skin which has nothing in it? In the latter case the whole mind is mechanical. (It would not be a discrete-state machine however. We have discussed this.)
370
+
371
+ These last two paragraphs do not claim to be convincing arguments. They should rather be described as "recitations tending to produce belief."
372
+
373
+ The only really satisfactory support that can be given for the view expressed at the beginning of Β§6, will be that provided by waiting for the end of the century and then doing the experiment described. But what can we say in the meantime? What steps should be taken now if the experiment is to be successful?
374
+
375
+ As I have explained, the problem is mainly one of programming. Advances in engineering will have to be made too, but it seems unlikely that these will not be adequate for the requirements. Estimates of the storage capacity of the brain vary from 1010 to 1015 binary digits. I incline to the lower values and believe that only a very small fraction is used for the higher types of thinking. Most of it is probably used for the retention of visual impressions, I should be surprised if more than 109 was required for satisfactory playing of the imitation game, at any rate against a blind man. (Note: The capacity of the Encyclopaedia Britannica, 11th edition, is 2 X 109) A storage capacity of 107, would be a very practicable possibility even by present techniques. It is probably not necessary to increase the speed of operations of the machines at all. Parts of modern machines which can be regarded as analogs of nerve cells work about a thousand times faster than the latter. This should provide a "margin of safety" which could cover losses
376
+
377
+ of speed arising in many ways, Our problem then is to find out how to programme these machines to play the game. At my present rate of working I produce about a thousand digits of progratiirne a day, so that about sixty workers, working steadily through the fifty years might accomplish the job, if nothing went into the wastepaper basket. Some more expeditious method seems desirable.
378
+
379
+ In the process of trying to imitate an adult human mind we are bound to think a good deal about the process which has brought it to the state that it is in. We may notice three components.
380
+
381
+ (a) The initial state of the mind, say at birth,
382
+
383
+ (b) The education to which it has been subjected,
384
+
385
+ (c) Other experience, not to be described as education, to which it has been subjected.
386
+
387
+ Instead of trying to produce a programme to simulate the adult mind, why not rather try to produce one which simulates the child's? If this were then subjected to an appropriate course of education one would obtain the adult brain. Presumably the child brain is something like a notebook as one buys it from the stationer's. Rather little mechanism, and lots of blank sheets. (Mechanism and writing are from our point of view almost synonymous.) Our hope is that there is so little mechanism in the child brain that something like it can be easily programmed. The amount of work in the education we can assume, as a first approximation, to be much the same as for the human child.
388
+
389
+ We have thus divided our problem into two parts. The child programme and the education process. These two remain very closely connected. We cannot expect to find a good child machine at the first attempt. One must experiment with teaching one such machine and see how well it learns. One can then try another and see if it is better or worse. There is an obvious connection between this process and evolution, by the identifications
390
+
391
+ Structure of the child machine = hereditary material
392
+
393
+ Changes of the child machine = mutation,
394
+
395
+ Natural selection = judgment of the experimenter
396
+
397
+ One may hope, however, that this process will be more expeditious than evolution. The survival of the fittest is a slow method for measuring advantages. The experimenter, by the exercise of intelligence, should he able to speed it up. Equally important is the fact that he is not restricted to random mutations. If he can trace a cause for some weakness he can probably think of the kind of mutation which will improve it.
398
+
399
+ It will not be possible to apply exactly the same teaching process to the machine as to a normal child. It will not, for instance, be provided with legs, so that it could not be asked
400
+
401
+ to go out and fill the coal scuttle. Possibly it might not have eyes. But however well these deficiencies might be overcome by clever engineering, one could not send the creature to school without the other children making excessive fun of it. It must be given some tuition. We need not be too concerned about the legs, eyes, etc. The example of Miss Helen Keller shows that education can take place provided that communication in both directions between teacher and pupil can take place by some means or other.
402
+
403
+ We normally associate punishments and rewards with the teaching process. Some simple child machines can be constructed or programmed on this sort of principle. The machine has to be so constructed that events which shortly preceded the occurrence of a punishment signal are unlikely to be repeated, whereas a reward signal increased the probability of repetition of the events which led up to it. These definitions do not presuppose any feelings on the part of the machine, I have done some experiments with one such child machine, and succeeded in teaching it a few things, but the teaching method was too unorthodox for the experiment to be considered really successful.
404
+
405
+ The use of punishments and rewards can at best be a part of the teaching process.
406
+
407
+ Roughly speaking, if the teacher has no other means of communicating to the pupil, the amount of information which can reach him does not exceed the total number of rewards and punishments applied. By the time a child has learnt to repeat "Casabianca" he would probably feel very sore indeed, if the text could only be discovered by a "Twenty Questions" technique, every "NO" taking the form of a blow. It is necessary therefore to have some other "unemotional" channels of communication. If these are available it is possible to teach a machine by punishments and rewards to obey orders given in some language, e.g., a symbolic language. These orders are to be transmitted through the
408
+
409
+ "unemotional" channels. The use of this language will diminish greatly the number of punishments and rewards required.
410
+
411
+ Opinions may vary as to the complexity which is suitable in the child machine. One might try to make it as simple as possible consistently with the general principles.
412
+
413
+ Alternatively one might have a complete system of logical inference "built in."' In the latter case the store would be largely occupied with definitions and propositions. The propositions would have various kinds of status, e.g., well-established facts, conjectures, mathematically proved theorems, statements given by an authority, expressions having the logical form of proposition but not belief-value. Certain propositions may be described as "imperatives." The machine should be so constructed that as soon as an imperative is classed as "well established" the appropriate action automatically takes place. To illustrate this, suppose the teacher says to the machine, "Do your homework now." This may cause "Teacher says 'Do your homework now' " to be included amongst the well-established facts. Another such fact might be, "Everything that teacher says is true." Combining these may eventually lead to the imperative, "Do your homework now,"
414
+
415
+ being included amongst the well-established facts, and this, by the construction of the machine, will mean that the homework actually gets started, but the effect is very satisfactory. The processes of inference used by the machine need not be such as would satisfy the most exacting logicians. There might for instance be no hierarchy of types. But this need not mean that type fallacies will occur, any more than we are bound to fall over
416
+
417
+ unfenced cliffs. Suitable imperatives (expressed within the systems, not forming part of the rules of the system) such as "Do not use a class unless it is a subclass of one which has been mentioned by teacher" can have a similar effect to "Do not go too near the edge."
418
+
419
+ The imperatives that can be obeyed by a machine that has no limbs are bound to be of a rather intellectual character, as in the example (doing homework) given above. important amongst such imperatives will be ones which regulate the order in which the rules of the logical system concerned are to be applied, For at each stage when one is using a logical system, there is a very large number of alternative steps, any of which one is permitted to apply, so far as obedience to the rules of the logical system is concerned. These choices make the difference between a brilliant and a footling reasoner, not the difference between a sound and a fallacious one. Propositions leading to imperatives of this kind might be "When Socrates is mentioned, use the syllogism in Barbara" or "If one method has been proved to be quicker than another, do not use the slower method." Some of these may be "given by authority," but others may be produced by the machine itself, e.g.
420
+
421
+ by scientific induction.
422
+
423
+ The idea of a learning machine may appear paradoxical to some readers. How can the rules of operation of the machine change? They should describe completely how the machine will react whatever its history might be, whatever changes it might undergo. The rules are thus quite time-invariant. This is quite true. The explanation of the paradox is that the rules which get changed in the learning process are of a rather less pretentious kind, claiming only an ephemeral validity. The reader may draw a parallel with the Constitution of the United States.
424
+
425
+ An important feature of a learning machine is that its teacher will often be very largely ignorant of quite what is going on inside, although he may still be able to some extent to predict his pupil's behavior. This should apply most strongly to the later education of a machine arising from a child machine of well-tried design (or programme). This is in clear contrast with normal procedure when using a machine to do computations one's object is then to have a clear mental picture of the state of the machine at each moment in the computation. This object can only be achieved with a struggle. The view that "the machine can only do what we know how to order it to do,"' appears strange in face of this. Most of the programmes which we can put into the machine will result in its doing something that we cannot make sense (if at all, or which we regard as completely random behaviour. Intelligent behaviour presumably consists in a departure from the completely disciplined behaviour involved in computation, but a rather slight one, which does not give rise to random behaviour, or to pointless repetitive loops. Another important result of preparing our machine for its part in the imitation game by a process of teaching and learning is that "human fallibility" is likely to be omitted in a rather natural way, i.e., without special "coaching." (The reader should reconcile this with the point of view on pages 23 and 24.) Processes that are learnt do not produce a hundred per cent certainty of result; if they did they could not be unlearnt.
426
+
427
+ It is probably wise to include a random element in a learning machine. A random element is rather useful when we are searching for a solution of some problem. Suppose for instance we wanted to find a number between 50 and 200 which was equal to the square of the sum of its digits, we might start at 51 then try 52 and go on until we got a number that worked. Alternatively we might choose numbers at random until we got a good one.
428
+
429
+ This method has the advantage that it is unnecessary to keep track of the values that have been tried, but the disadvantage that one may try the same one twice, but this is not very important if there are several solutions. The systematic method has the disadvantage that there may be an enormous block without any solutions in the region which has to be investigated first, Now the learning process may be regarded as a search for a form of behaviour which will satisfy the teacher (or some other criterion). Since there is probably a very large number of satisfactory solutions the random method seems to be better than the systematic. It should be noticed that it is used in the analogous process of evolution.
430
+
431
+ But there the systematic method is not possible. How could one keep track of the different genetical combinations that had been tried, so as to avoid trying them again?
432
+
433
+ We may hope that machines will eventually compete with men in all purely intellectual fields. But which are the best ones to start with? Even this is a difficult decision. Many people think that a very abstract activity, like the playing of chess, would be best. It can also be maintained that it is best to provide the machine with the best sense organs that money can buy, and then teach it to understand and speak English. This process could follow the normal teaching of a child. Things would be pointed out and named, etc.
434
+
435
+ Again I do not know what the right answer is, but I think both approaches should be tried.
436
+
437
+ We can only see a short distance ahead, but we can see plenty there that needs to be done.
438
+
439
+
440
+
441
+
442
+
examples/HFblog-An Introduction to Q-Learning Part 1.txt ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ created: 2022-05-23T01:23:41 (UTC +02:00)
3
+ tags: []
4
+ source: https://huggingface.co/blog/deep-rl-q-part1
5
+ author: ThomasSimonini
6
+ Thomas Simonini
7
+ ---
8
+
9
+ # An Introduction to Q-Learning Part 1
10
+
11
+ > ## Excerpt
12
+ > We’re on a journey to advance and democratize artificial intelligence through open source and open science.
13
+
14
+ ---
15
+ Back to blog
16
+
17
+ ## Unit 2, part 1 of theΒ Deep Reinforcement Learning Class with Hugging Face πŸ€—
18
+
19
+ ## _This article is part of the Deep Reinforcement Learning Class. A free course from beginner to expert. Check the syllabusΒ here._
20
+
21
+ In theΒ first chapter of this class, we learned about Reinforcement Learning (RL), the RL process, and the different methods to solve an RL problem. We also trained our first lander agent toΒ **land correctly on the Moon πŸŒ• and uploaded it to the Hugging Face Hub.**
22
+
23
+ So today, we're going toΒ **dive deeper into one of the Reinforcement Learning methods: value-based methods**Β and study our first RL algorithm:Β **Q-Learning.**
24
+
25
+ We'll alsoΒ **implement our first RL agent from scratch**: a Q-Learning agent and will train it in two environments:
26
+
27
+ 1. Frozen-Lake-v1 (non-slippery version): where our agent will need toΒ **go from the starting state (S) to the goal state (G)**Β by walking only on frozen tiles (F) and avoiding holes (H).
28
+ 2. An autonomous taxi will needΒ **to learn to navigate**Β a city toΒ **transport its passengers from point A to point B.**
29
+
30
+ This unit is divided into 2 parts:
31
+
32
+ In the first part, we'llΒ **learn about the value-based methods and the difference between Monte Carlo and Temporal Difference Learning.**
33
+
34
+ And in the second part,Β **we'll study our first RL algorithm: Q-Learning, and implement our first RL Agent.**
35
+
36
+ This unit is fundamentalΒ **if you want to be able to work on Deep Q-Learning**Β (unit 3): the first Deep RL algorithm that was able to play Atari games andΒ **beat the human level on some of them**Β (breakout, space invaders…).
37
+
38
+ So let's get started!
39
+
40
+ - What is RL? A short recap
41
+ - The two types of value-based methods
42
+ - The State-Value function
43
+ - The Action-Value function
44
+ - The Bellman Equation: simplify our value estimation
45
+ - Monte Carlo vs Temporal Difference Learning
46
+ - Monte Carlo: learning at the end of the episode
47
+ - Temporal Difference Learning: learning at each step
48
+
49
+ ## **What is RL? A short recap**
50
+
51
+ In RL, we build an agent that canΒ **make smart decisions**. For instance, an agent thatΒ **learns to play a video game.**Β Or a trading agent thatΒ **learns to maximize its benefits**Β by making smart decisions onΒ **what stocks to buy and when to sell.**
52
+
53
+ But, to make intelligent decisions, our agent will learn from the environment byΒ **interacting with it through trial and error**Β and receiving rewards (positive or negative)Β **as unique feedback.**
54
+
55
+ Its goalΒ **is to maximize its expected cumulative reward**Β (because of the reward hypothesis).
56
+
57
+ **The agent's decision-making process is called the policy Ο€:**Β given a state, a policy will output an action or a probability distribution over actions. That is, given an observation of the environment, a policy will provide an action (or multiple probabilities for each action) that the agent should take.
58
+
59
+ **Our goal is to find an optimal policy Ο€**\*, aka., a policy that leads to the best expected cumulative reward.
60
+
61
+ And to find this optimal policy (hence solving the RL problem), thereΒ **are two main types of RL methods**:
62
+
63
+ - _Policy-based methods_:Β **Train the policy directly**Β to learn which action to take given a state.
64
+ - _Value-based methods_:Β **Train a value function**Β to learnΒ **which state is more valuable**Β and use this value functionΒ **to take the action that leads to it.**
65
+
66
+ And in this chapter,Β **we'll dive deeper into the Value-based methods.**
67
+
68
+ ## **The two types of value-based methods**
69
+
70
+ In value-based methods,Β **we learn a value function**Β thatΒ **maps a state to the expected value of being at that state.**
71
+
72
+ The value of a state is theΒ **expected discounted return**Β the agent can get if itΒ **starts at that state and then acts according to our policy.**
73
+
74
+ If you forgot what discounting is, you can read this section.
75
+
76
+ > But what means acting according to our policy? We don't have a policy in value-based methods since we train a value function and not a policy?
77
+
78
+ Remember that the goal of anΒ **RL agent is to have an optimal policy Ο€.**
79
+
80
+ To find it, we learned that there are two different methods:
81
+
82
+ - _Policy-based methods:_Β **Directly train the policy**Β to select what action to take given a state (or a probability distribution over actions at that state). In this case, weΒ **don't have a value function.**
83
+
84
+ The policy takes a state as input and outputs what action to take at that state (deterministic policy).
85
+
86
+ And consequently,Β **we don't define by hand the behavior of our policy; it's the training that will define it.**
87
+
88
+ - _Value-based methods:_Β **Indirectly, by training a value function**Β that outputs the value of a state or a state-action pair. Given this value function, our policyΒ **will take action.**
89
+
90
+ But, because we didn't train our policy,Β **we need to specify its behavior.**Β For instance, if we want a policy that, given the value function, will take actions that always lead to the biggest reward,Β **we'll create a Greedy Policy.**
91
+
92
+ Given a state, our action-value function (that we train) outputs the value of each action at that state, then our greedy policy (that we defined) selects the action with the biggest state-action pair value.
93
+
94
+ Consequently, whatever method you use to solve your problem,Β **you will have a policy**, but in the case of value-based methods you don't train it, your policyΒ **is just a simple function that you specify**Β (for instance greedy policy) and this policyΒ **uses the values given by the value-function to select its actions.**
95
+
96
+ So the difference is:
97
+
98
+ - In policy-based,Β **the optimal policy is found by training the policy directly.**
99
+ - In value-based,Β **finding an optimal value function leads to having an optimal policy.**
100
+
101
+ In fact, most of the time, in value-based methods, you'll useΒ **an Epsilon-Greedy Policy**Β that handles the exploration/exploitation trade-off; we'll talk about it when we talk about Q-Learning in the second part of this unit.
102
+
103
+ So, we have two types of value-based functions:
104
+
105
+ ### **The State-Value function**
106
+
107
+ We write the state value function under a policy Ο€ like this:
108
+
109
+ For each state, the state-value function outputs the expected return if the agentΒ **starts at that state,**Β and then follow the policy forever after (for all future timesteps if you prefer).
110
+
111
+ If we take the state with value -7: it's the expected return starting at that state and taking actions according to our policy (greedy policy), so right, right, right, down, down, right, right.
112
+
113
+ ### **The Action-Value function**
114
+
115
+ In the Action-value function, for each state and action pair, the action-value functionΒ **outputs the expected return**Β if the agent starts in that state and takes action, and then follows the policy forever after.
116
+
117
+ The value of taking action an in state s under a policy Ο€ is:
118
+
119
+ We see that the difference is:
120
+
121
+ - In state-value function, we calculateΒ **the value of a state (St).**
122
+ - In action-value function, we calculateΒ **the value of the state-action pair (St, At) hence the value of taking that action at that state.**
123
+
124
+ Note: We didn't fill all the state-action pairs for the example of Action-value function
125
+
126
+ In either case, whatever value function we choose (state-value or action-value function),Β **the value is the expected return.**
127
+
128
+ However, the problem is that it implies thatΒ **to calculate EACH value of a state or a state-action pair, we need to sum all the rewards an agent can get if it starts at that state.**
129
+
130
+ This can be a tedious process, and that'sΒ **where the Bellman equation comes to help us.**
131
+
132
+ ## **The Bellman Equation: simplify our value estimation**
133
+
134
+ The Bellman equationΒ **simplifies our state value or state-action value calculation.**
135
+
136
+ With what we learned from now, we know that if we calculate the V(St) (value of a state), we need to calculate the return starting at that state and then follow the policy forever after.Β **(Our policy that we defined in the following example is a Greedy Policy, and for simplification, we don't discount the reward).**
137
+
138
+ So to calculate V(St), we need to make the sum of the expected rewards. Hence:
139
+
140
+ To calculate the value of State 1: the sum of rewardsΒ \*\*if the agent started in that state\*\*Β and then followed theΒ \*\*greedy policy (taking actions that leads to the best states values) for all the time steps.\*\*
141
+
142
+ Then, to calculate the V(St+1), we need to calculate the return starting at that state St+1.
143
+
144
+ To calculate the value of State 2: the sum of rewardsΒ \*\*if the agent started in that state,\*\*Β and then followed theΒ \*\*policy for all the time steps.\*\*
145
+
146
+ So you see, that's a pretty tedious process if you need to do it for each state value or state-action value.
147
+
148
+ Instead of calculating the expected return for each state or each state-action pair,Β **we can use the Bellman equation.**
149
+
150
+ The Bellman equation is a recursive equation that works like this: instead of starting for each state from the beginning and calculating the return, we can consider the value of any state as:
151
+
152
+ **The immediate reward (Rt+1) + the discounted value of the state that follows (gamma \* V(St+1)).**
153
+
154
+ For simplification here we don’t discount so gamma = 1.
155
+
156
+ If we go back to our example, the value of State 1= expected cumulative return if we start at that state.
157
+
158
+ To calculate the value of State 1: the sum of rewardsΒ **if the agent started in that state 1**Β and then followed theΒ **policy for all the time steps.**
159
+
160
+ Which is equivalent to V(St) = Immediate reward (Rt+1) + Discounted value of the next state (Gamma \* V(St+1))
161
+
162
+ For simplification, here we don't discount, so gamma = 1.
163
+
164
+ - The value of V(St+1) = Immediate reward (Rt+2) + Discounted value of the St+2 (Gamma \* V(St+2)).
165
+ - And so on.
166
+
167
+ To recap, the idea of the Bellman equation is that instead of calculating each value as the sum of the expected return,Β **which is a long process.**Β This is equivalentΒ **to the sum of immediate reward + the discounted value of the state that follows.**
168
+
169
+ ## **Monte Carlo vs Temporal Difference Learning**
170
+
171
+ The last thing we need to talk about before diving into Q-Learning is the two ways of learning.
172
+
173
+ Remember that an RL agentΒ **learns by interacting with its environment.**Β The idea is thatΒ **using the experience taken**, given the reward it gets, willΒ **update its value or policy.**
174
+
175
+ Monte Carlo and Temporal Difference Learning are two differentΒ **strategies on how to train our value function or our policy function.**Β Both of themΒ **use experience to solve the RL problem.**
176
+
177
+ On one hand, Monte Carlo usesΒ **an entire episode of experience before learning.**Β On the other hand, Temporal Difference usesΒ **only a step (St, At, Rt+1, St+1) to learn.**
178
+
179
+ We'll explain both of themΒ **using a value-based method example.**
180
+
181
+ ### **Monte Carlo: learning at the end of the episode**
182
+
183
+ Monte Carlo waits until the end of the episode, calculates Gt (return) and uses it asΒ **a target for updating V(St).**
184
+
185
+ So it requires aΒ **complete entire episode of interaction before updating our value function.**
186
+
187
+ If we take an example:
188
+
189
+ - We always start the episodeΒ **at the same starting point.**
190
+
191
+ - **The agent takes actions using the policy**. For instance, using an Epsilon Greedy Strategy, a policy that alternates between exploration (random actions) and exploitation.
192
+
193
+ - We getΒ **the reward and the next state.**
194
+
195
+ - We terminate the episode if the cat eats the mouse or if the mouse moves > 10 steps.
196
+
197
+ - At the end of the episode,Β **we have a list of State, Actions, Rewards, and Next States**
198
+
199
+ - **The agent will sum the total rewards Gt**Β (to see how well it did).
200
+
201
+ - It will thenΒ **update V(st) based on the formula**
202
+
203
+
204
+ - ThenΒ **start a new game with this new knowledge**
205
+
206
+ By running more and more episodes,Β **the agent will learn to play better and better.**
207
+
208
+ For instance, if we train a state-value function using Monte Carlo:
209
+
210
+ - We just started to train our Value function,Β **so it returns 0 value for each state**
211
+ - Our learning rate (lr) is 0.1 and our discount rate is 1 (= no discount)
212
+ - Our mouseΒ **explores the environment and takes random actions**
213
+
214
+ - The mouse made more than 10 steps, so the episode ends .
215
+
216
+ - We have a list of state, action, rewards, next\_state,Β **we need to calculate the return Gt**
217
+ - $$G\_t = R\_{t+1} + R\_{t+2} + R\_{t+3} ...$$
218
+ - Gt = Rt+1 + Rt+2 + Rt+3… (for simplicity we don’t discount the rewards).
219
+ - Gt = 1 + 0 + 0 + 0+ 0 + 0 + 1 + 1+ 0 + 0
220
+ - Gt= 3
221
+ - We can now update V(S0):
222
+
223
+ - New V(S0) = V(S0) + lr \* \[Gt β€” V(S0)\]
224
+ - New V(S0) = 0 + 0.1 \* \[3 –0\]
225
+ - The new V(S0) = 0.3
226
+
227
+ ### **Temporal Difference Learning: learning at each step**
228
+
229
+ - **Temporal difference, on the other hand, waits for only one interaction (one step) St+1**
230
+ - to form a TD target and update V(St) using Rt+1 and gamma \* V(St+1).
231
+
232
+ The idea withΒ **TD is to update the V(St) at each step.**
233
+
234
+ But because we didn't play during an entire episode, we don't have Gt (expected return). Instead, **we estimate Gt by adding Rt+1 and the discounted value of the next state.**
235
+
236
+ We speak aboutΒ **bootstrap because TD bases its update part on an existing estimate V(St+1) and not a complete sample Gt.**
237
+
238
+ This method is called TD(0) orΒ **one-step TD (update the value function after any individual step).**
239
+
240
+ If we take the same example,
241
+
242
+ - We just started to train our Value function, so it returns 0 value for each state.
243
+ - Our learning rate (lr) is 0.1, and our discount rate is 1 (no discount).
244
+ - Our mouse explore the environment and take a random action:Β **going to the left**
245
+ - It gets a reward Rt+1 = 1 sinceΒ **it eats a piece of cheese**
246
+
247
+ We can now update V(S0):
248
+
249
+ New V(S0) = V(S0) + lr \* \[R1 + gamma \* V(S1) β€” V(S0)\]
250
+
251
+ New V(S0) = 0 + 0.1 \* \[1 + 0.99 \* 0–0\]
252
+
253
+ The new V(S0) = 0.1
254
+
255
+ So we just updated our value function for State 0.
256
+
257
+ Now weΒ **continue to interact with this environment with our updated value function.**
258
+
259
+ If we summarize:
260
+
261
+ - With Monte Carlo, we update the value function from a complete episode, and so weΒ **use the actual accurate discounted return of this episode.**
262
+ - With TD learning, we update the value function from a step, so we replace Gt that we don't have withΒ **an estimated return called TD target.**
263
+
264
+ So now, before diving on Q-Learning, let's summarise what we just learned:
265
+
266
+ We have two types of value-based functions:
267
+
268
+ - State-Value function: outputs the expected return ifΒ **the agent starts at a given state and acts accordingly to the policy forever after.**
269
+ - Action-Value function: outputs the expected return ifΒ **the agent starts in a given state, takes a given action at that state**Β and then acts accordingly to the policy forever after.
270
+ - In value-based methods,Β **we define the policy by hand**Β because we don't train it, we train a value function. The idea is that if we have an optimal value function, weΒ **will have an optimal policy.**
271
+
272
+ There are two types of methods to learn a policy or a value function:
273
+
274
+ - WithΒ _the Monte Carlo method_, we update the value function from a complete episode, and so weΒ **use the actual accurate discounted return of this episode.**
275
+ - WithΒ _the TD Learning method,_Β we update the value function from a step, so we replace Gt that we don't have withΒ **an estimated return called TD target.**
276
+
277
+ ---
278
+
279
+ So that’s all for today. Congrats on finishing this first part of the chapter! There was a lot of information.
280
+
281
+ **That’s normal if you still feel confused with all these elements**. This was the same for me and for all people who studied RL.
282
+
283
+ **Take time to really grasp the material before continuing**. In the second part (that we will publish this Friday πŸ“†), we’ll study our first RL algorithm: Q-Learning, and implement our first RL Agent in two environments:
284
+
285
+ 1. Frozen-Lake-v1 (non-slippery version): where our agent will need toΒ **go from the starting state (S) to the goal state (G)**Β by walking only on frozen tiles (F) and avoiding holes (H).
286
+ 2. An autonomous taxi will needΒ **to learn to navigate**Β a city toΒ **transport its passengers from point A to point B.**
287
+
288
+ And don't forget to share with your friends who want to learn πŸ€— !
289
+
290
+ ### Keep learning, stay awesome,
examples/HFblog-Introducing Decision Transformers.txt ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ created: 2022-05-23T01:23:57 (UTC +02:00)
3
+ tags: []
4
+ source: https://huggingface.co/blog/decision-transformers
5
+ author:
6
+ ---
7
+
8
+ # Introducing Decision Transformers on Hugging Face πŸ€—
9
+
10
+ > ## Excerpt
11
+ > We’re on a journey to advance and democratize artificial intelligence through open source and open science.
12
+
13
+ ---
14
+ Back to blog
15
+
16
+ At Hugging Face, we are contributing to the ecosystem for Deep Reinforcement Learning researchers and enthusiasts. Recently, we have integrated Deep RL frameworks such as Stable-Baselines3.
17
+
18
+ And today we are happy to announce that we integrated the Decision Transformer, an Offline Reinforcement Learning method, into the πŸ€— transformers library and the Hugging Face Hub. We have some exciting plans for improving accessibility in the field of Deep RL and we are looking forward to sharing them with you over the coming weeks and months.
19
+
20
+ - What is Offline Reinforcement Learning?
21
+ - Introducing Decision Transformers
22
+ - Using the Decision Transformer in πŸ€— Transformers
23
+ - Conclusion
24
+ - What's next?
25
+ - References
26
+
27
+ ## What is Offline Reinforcement Learning?
28
+
29
+ Deep Reinforcement Learning (RL) is a framework to build decision-making agents. These agents aim to learn optimal behavior (policy) by interacting with the environment through trial and error and receiving rewards as unique feedback.
30
+
31
+ The agent’s goal is to maximize **its cumulative reward, called return.** Because RL is based on the reward hypothesis: **all goals can be described as the maximization of the expected cumulative reward.**
32
+
33
+ Deep Reinforcement Learning agents **learn with batches of experience.** The question is, how do they collect it?:
34
+
35
+ _A comparison between Reinforcement Learning in an Online and Offline setting, figure taken from this post_
36
+
37
+ In online reinforcement learning, **the agent gathers data directly**: it collects a batch of experience by interacting with the environment. Then, it uses this experience immediately (or via some replay buffer) to learn from it (update its policy).
38
+
39
+ But this implies that either you train your agent directly in the real world or have a simulator. If you don’t have one, you need to build it, which can be very complex (how to reflect the complex reality of the real world in an environment?), expensive, and insecure since if the simulator has flaws, the agent will exploit them if they provide a competitive advantage.
40
+
41
+ On the other hand, in offline reinforcement learning, the agent only uses data collected from other agents or human demonstrations. **It does not interact with the environment**.
42
+
43
+ The process is as follows:
44
+
45
+ 1. Create a dataset using one or more policies and/or human interactions.
46
+ 2. Run offline RL on this dataset to learn a policy
47
+
48
+ This method has one drawback: the counterfactual queries problem. What do we do if our agent decides to do something for which we don’t have the data? For instance, turning right on an intersection but we don’t have this trajectory.
49
+
50
+ There’s already exists some solutions on this topic, but if you want to know more about offline reinforcement learning you can watch this video
51
+
52
+ ## Introducing Decision Transformers
53
+
54
+ The Decision Transformer model was introduced by β€œDecision Transformer: Reinforcement Learning via Sequence Modeling” by Chen L. et al. It abstracts Reinforcement Learning as a **conditional-sequence modeling problem**.
55
+
56
+ The main idea is that instead of training a policy using RL methods, such as fitting a value function, that will tell us what action to take to maximize the return (cumulative reward), we use a sequence modeling algorithm (Transformer) that, given a desired return, past states, and actions, will generate future actions to achieve this desired return. It’s an autoregressive model conditioned on the desired return, past states, and actions to generate future actions that achieve the desired return.
57
+
58
+ This is a complete shift in the Reinforcement Learning paradigm since we use generative trajectory modeling (modeling the joint distribution of the sequence of states, actions, and rewards) to replace conventional RL algorithms. It means that in Decision Transformers, we don’t maximize the return but rather generate a series of future actions that achieve the desired return.
59
+
60
+ The process goes this way:
61
+
62
+ 1. We feed the last K timesteps into the Decision Transformer with 3 inputs:
63
+ - Return-to-go
64
+ - State
65
+ - Action
66
+ 2. The tokens are embedded either with a linear layer if the state is a vector or CNN encoder if it’s frames.
67
+ 3. The inputs are processed by a GPT-2 model which predicts future actions via autoregressive modeling.
68
+
69
+ _Decision Transformer architecture. States, actions, and returns are fed into modality specific linear embeddings and a positional episodic timestep encoding is added. Tokens are fed into a GPT architecture which predicts actions autoregressively using a causal self-attention mask. Figure from \[1\]._
70
+
71
+ ## Using the Decision Transformer in πŸ€— Transformers
72
+
73
+ The Decision Transformer model is now available as part of the πŸ€— transformers library. In addition, we share nine pre-trained model checkpoints for continuous control tasks in the Gym environment.
74
+
75
+ _An β€œexpert” Decision Transformers model, learned using offline RL in the Gym Walker2d environment._
76
+
77
+ ### Install the package
78
+
79
+ ```
80
+ pip install git+https://github.com/huggingface/transformers
81
+ ```
82
+
83
+ ### Loading the model
84
+
85
+ Using the Decision Transformer is relatively easy, but as it is an autoregressive model, some care has to be taken in order to prepare the model’s inputs at each time-step. We have prepared both a Python script and a Colab notebook that demonstrates how to use this model.
86
+
87
+ Loading a pretrained Decision Transformer is simple in the πŸ€— transformers library:
88
+
89
+ ```
90
+ from transformers import DecisionTransformerModel
91
+
92
+ model_name = "edbeeching/decision-transformer-gym-hopper-expert"
93
+ model = DecisionTransformerModel.from_pretrained(model_name)
94
+ ```
95
+
96
+ ### Creating the environment
97
+
98
+ We provide pretrained checkpoints for the Gym Hopper, Walker2D and Halfcheetah. Checkpoints for Atari environments will soon be available.
99
+
100
+ ```
101
+ import gym
102
+ env = gym.make("Hopper-v3")
103
+ state_dim = env.observation_space.shape[0] # state size
104
+ act_dim = env.action_space.shape[0] # action size
105
+ ```
106
+
107
+ ### Autoregressive prediction function
108
+
109
+ The model performs an autoregressive prediction; that is to say that predictions made at the current time-step **t** are sequentially conditioned on the outputs from previous time-steps. This function is quite meaty, so we will aim to explain it in the comments.
110
+
111
+ ```
112
+ # Function that gets an action from the model using autoregressive prediction
113
+ # with a window of the previous 20 timesteps.
114
+ def get_action(model, states, actions, rewards, returns_to_go, timesteps):
115
+ # This implementation does not condition on past rewards
116
+
117
+ states = states.reshape(1, -1, model.config.state_dim)
118
+ actions = actions.reshape(1, -1, model.config.act_dim)
119
+ returns_to_go = returns_to_go.reshape(1, -1, 1)
120
+ timesteps = timesteps.reshape(1, -1)
121
+
122
+ # The prediction is conditioned on up to 20 previous time-steps
123
+ states = states[:, -model.config.max_length :]
124
+ actions = actions[:, -model.config.max_length :]
125
+ returns_to_go = returns_to_go[:, -model.config.max_length :]
126
+ timesteps = timesteps[:, -model.config.max_length :]
127
+
128
+ # pad all tokens to sequence length, this is required if we process batches
129
+ padding = model.config.max_length - states.shape[1]
130
+ attention_mask = torch.cat([torch.zeros(padding), torch.ones(states.shape[1])])
131
+ attention_mask = attention_mask.to(dtype=torch.long).reshape(1, -1)
132
+ states = torch.cat([torch.zeros((1, padding, state_dim)), states], dim=1).float()
133
+ actions = torch.cat([torch.zeros((1, padding, act_dim)), actions], dim=1).float()
134
+ returns_to_go = torch.cat([torch.zeros((1, padding, 1)), returns_to_go], dim=1).float()
135
+ timesteps = torch.cat([torch.zeros((1, padding), dtype=torch.long), timesteps], dim=1)
136
+
137
+ # perform the prediction
138
+ state_preds, action_preds, return_preds = model(
139
+ states=states,
140
+ actions=actions,
141
+ rewards=rewards,
142
+ returns_to_go=returns_to_go,
143
+ timesteps=timesteps,
144
+ attention_mask=attention_mask,
145
+ return_dict=False,)
146
+ return action_preds[0, -1]
147
+ ```
148
+
149
+ ### Evaluating the model
150
+
151
+ In order to evaluate the model, we need some additional information; the mean and standard deviation of the states that were used during training. Fortunately, these are available for each of the checkpoint’s model card on the Hugging Face Hub!
152
+
153
+ We also need a target return for the model. This is the power of Offline Reinforcement Learning: we can use the target return to control the performance of the policy. This could be really powerful in a multiplayer setting, where we would like to adjust the performance of an opponent bot to be at a suitable difficulty for the player. The authors show a great plot of this in their paper!
154
+
155
+ _Sampled (evaluation) returns accumulated by Decision Transformer when conditioned on the specified target (desired) returns. Top: Atari. Bottom: D4RL medium-replay datasets. Figure from \[1\]._
156
+
157
+ ```
158
+ TARGET_RETURN = 3.6 # This was normalized during training
159
+ MAX_EPISODE_LENGTH = 1000
160
+
161
+ state_mean = np.array(
162
+ [1.3490015, -0.11208222, -0.5506444, -0.13188992, -0.00378754, 2.6071432,
163
+ 0.02322114, -0.01626922, -0.06840388, -0.05183131, 0.04272673,])
164
+
165
+ state_std = np.array(
166
+ [0.15980862, 0.0446214, 0.14307782, 0.17629202, 0.5912333, 0.5899924,
167
+ 1.5405099, 0.8152689, 2.0173461, 2.4107876, 5.8440027,])
168
+
169
+ state_mean = torch.from_numpy(state_mean)
170
+ state_std = torch.from_numpy(state_std)
171
+
172
+ state = env.reset()
173
+ target_return = torch.tensor(TARGET_RETURN).float().reshape(1, 1)
174
+ states = torch.from_numpy(state).reshape(1, state_dim).float()
175
+ actions = torch.zeros((0, act_dim)).float()
176
+ rewards = torch.zeros(0).float()
177
+ timesteps = torch.tensor(0).reshape(1, 1).long()
178
+
179
+ # take steps in the environment
180
+ for t in range(max_ep_len):
181
+ # add zeros for actions as input for the current time-step
182
+ actions = torch.cat([actions, torch.zeros((1, act_dim))], dim=0)
183
+ rewards = torch.cat([rewards, torch.zeros(1)])
184
+
185
+ # predicting the action to take
186
+ action = get_action(model,
187
+ (states - state_mean) / state_std,
188
+ actions,
189
+ rewards,
190
+ target_return,
191
+ timesteps)
192
+ actions[-1] = action
193
+ action = action.detach().numpy()
194
+
195
+ # interact with the environment based on this action
196
+ state, reward, done, _ = env.step(action)
197
+
198
+ cur_state = torch.from_numpy(state).reshape(1, state_dim)
199
+ states = torch.cat([states, cur_state], dim=0)
200
+ rewards[-1] = reward
201
+
202
+ pred_return = target_return[0, -1] - (reward / scale)
203
+ target_return = torch.cat([target_return, pred_return.reshape(1, 1)], dim=1)
204
+ timesteps = torch.cat([timesteps, torch.ones((1, 1)).long() * (t + 1)], dim=1)
205
+
206
+ if done:
207
+ break
208
+ ```
209
+
210
+ You will find a more detailed example, with the creation of videos of the agent in our Colab notebook.
211
+
212
+ ## Conclusion
213
+
214
+ In addition to Decision Transformers, we want to support more use cases and tools from the Deep Reinforcement Learning community. Therefore, it would be great to hear your feedback on the Decision Transformer model, and more generally anything we can build with you that would be useful for RL. Feel free to **reach out to us**.
215
+
216
+ ## What’s next?
217
+
218
+ In the coming weeks and months, we plan on supporting other tools from the ecosystem:
219
+
220
+ - Integrating **RL-baselines3-zoo**
221
+ - Uploading **RL-trained-agents models** into the Hub: a big collection of pre-trained Reinforcement Learning agents using stable-baselines3
222
+ - Integrating other Deep Reinforcement Learning libraries
223
+ - Implementing Convolutional Decision Transformers For Atari
224
+ - And more to come πŸ₯³
225
+
226
+ The best way to keep in touch is to **join our discord server** to exchange with us and with the community.
227
+
228
+ ## References
229
+
230
+ \[1\] Chen, Lili, et al. "Decision transformer: Reinforcement learning via sequence modeling." _Advances in neural information processing systems_ 34 (2021).
231
+
232
+ \[2\] Agarwal, Rishabh, Dale Schuurmans, and Mohammad Norouzi. "An optimistic perspective on offline reinforcement learning." _International Conference on Machine Learning_. PMLR, 2020.
233
+
234
+ ### Acknowledgements
235
+
236
+ We would like to thank the paper’s first authors, Kevin Lu and Lili Chen, for their constructive conversations.
examples/HFblog-Introducing Hugging Face for Education.txt ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ created: 2022-05-23T01:23:51 (UTC +02:00)
3
+ tags: []
4
+ source: https://huggingface.co/blog/education
5
+ author: Violette
6
+ Violette Lepercq
7
+ ---
8
+
9
+ # Introducing Hugging Face for Education πŸ€—
10
+
11
+ > ## Excerpt
12
+ > We’re on a journey to advance and democratize artificial intelligence through open source and open science.
13
+
14
+ ---
15
+ Back to blog
16
+
17
+ Given that machine learning will make up the overwhelming majority of software development and that non-technical people will be exposed to AI systems more and more, one of the main challenges of AI is adapting and enhancing employee skills. It is also becoming necessary to support teaching staff in proactively taking AI's ethical and critical issues into account.
18
+
19
+ As an open-source company democratizing machine learning, Hugging Face believes it is essential to educate people from all backgrounds worldwide.
20
+
21
+ We launched the ML demo.cratization tour in March 2022, where experts from Hugging Face taught hands-on classes on Building Machine Learning Collaboratively to more than 1000 students from 16 countries. Our new goal: **to teach machine learning to 5 million people by the end of 2023**.
22
+
23
+ _This blog post provides a high-level description of how we will reach our goals around education._
24
+
25
+ ## πŸ€—Β **Education for All**
26
+
27
+ πŸ—£οΈ Our goal is to make the potential and limitations of machine learning understandable to everyone. We believe that doing so will help evolve the field in a direction where the application of these technologies will lead to net benefits for society as a whole.
28
+
29
+ Some examples of our existing efforts:
30
+
31
+ - we describe in a very accessible way different uses of ML models (summarization, text generation, object detection…),
32
+ - we allow everyone to try out models directly in their browser through widgets in the model pages, hence lowering the need for technical skills to do so (example),
33
+ - we document and warn about harmful biases identified in systems (like GPT-2).
34
+ - we provide tools to create open-source ML apps that allow anyone to understand the potential of ML in one click.
35
+
36
+ ## πŸ€—Β **Education for Beginners**
37
+
38
+ πŸ—£οΈ We want to lower the barrier to becoming a machine learning engineer by providing online courses, hands-on workshops, and other innovative techniques.
39
+
40
+ - We provide a free course about natural language processing (NLP) and more domains (soon) using free tools and libraries from the Hugging Face ecosystem. It’s completely free and without ads. The ultimate goal of this course is to learn how to apply Transformers to (almost) any machine learning problem!
41
+ - We provide a free course about Deep Reinforcement Learning. In this course, you can study Deep Reinforcement Learning in theory and practice, learn to use famous Deep RL libraries, train agents in unique environments, publish your trained agents in one line of code to the Hugging Face Hub, and more!
42
+ - We provide a free course on how to buildΒ interactive demosΒ for your machine learning models. The ultimate goal of this course is to allow ML developers to easily present their work to a wide audience including non-technical teams or customers, researchers to more easily reproduce machine learning models and behavior, end users to more easily identify and debug failure points of models, and more!
43
+ - Experts at Hugging Face wrote a book on Transformers and their applications to a wide range of NLP tasks.
44
+
45
+ Apart from those efforts, many team members are involved in other educational efforts such as:
46
+
47
+ - Participating in meetups, conferences and workshops.
48
+ - Creating podcasts, YouTube videos, and blog posts.
49
+ - Organizing events in which free GPUs are provided for anyone to be able to train and share models and create demos for them.
50
+
51
+ ## πŸ€—Β **Education for Instructors**
52
+
53
+ πŸ—£οΈ We want to empower educators with tools and offer collaborative spaces where students can build machine learning using open-source technologies and state-of-the-art machine learning models.
54
+
55
+ - We provide to educators free infrastructure and resources to quickly introduce real-world applications of ML to theirs students and make learning more fun and interesting. By creating a classroom for free from the hub, instructors can turn their classes into collaborative environments where students can learn and build ML-powered applications using free open-source technologies and state-of-the-art models.Β 
56
+
57
+ - We’ve assembled a free toolkit translated to 8 languages that instructors of machine learning or Data Science can use to easily prepare labs, homework, or classes. The content is self-contained so that it can be easily incorporated into an existing curriculum. This content is free and uses well-known Open Source technologies (πŸ€— transformers, gradio, etc). Feel free to pick a tutorial and teach it!
58
+
59
+ 1️⃣ A Tour through the Hugging Face Hub
60
+
61
+ 2️⃣ Build and Host Machine Learning Demos with Gradio & Hugging Face
62
+
63
+ 3️⃣ Getting Started with Transformers
64
+
65
+ - We're organizing a dedicated, free workshop (June 6) on how to teach our educational resources in your machine learning and data science classes. Do not hesitate to register.
66
+
67
+ - We are currently doing a worldwide tour in collaboration with university instructors to teach more than 10000 students one of our core topics: How to build machine learning collaboratively? You can request someone on the Hugging Face team to run the session for your class via the ML demo.cratization tour initiative**.**
68
+
69
+
70
+ ## πŸ€—Β **Education Events & News**
71
+
72
+ - **05/13**\[NEWS\]: Are you studying machine learning? Do you want to be a part of our ML democratization efforts and show your campus community how to build ML models with Hugging Face? We want to support you in your journey! You have until June 13th to apply to πŸ€— Student Application Program.
73
+ - **06/06**\[EVENT\]: How to Teach Open-Source Machine Learning Tools. Register
74
+ - **09/08**\[EVENT\]: ML Demo.cratization tour in Argentina at 2pm (GMT-3). Link coming soon
75
+
76
+ πŸ”₯ We are currently working on more content in the course, and more! Stay tuned!
examples/LOTR-S-The Music of the Ainur.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ There was Eru, the One, who in Arda is called IlΓΊvatar; and he made first the Ainur, the Holy Ones, that were the offspring of his thought, and they were with him before aught else was made. And he spoke to them, propounding to them themes of music; and they sang before him, and he was glad. But for a long while they sang only each alone, or but few together, while the rest hearkened; for each comprehended only that part of me mind of IlΓΊvatar from which he came, and in the understanding of their brethren they grew but slowly. Yet ever as they listened they came to deeper understanding, and increased in unison and harmony.
2
+
3
+ And it came to pass that IlΓΊvatar called together all the Ainur and declared to them a mighty theme, unfolding to them things greater and more wonderful than he had yet revealed; and the glory of its beginning and the splendour of its end amazed the Ainur, so that they bowed before IlΓΊvatar and were silent.
4
+
5
+ Then IlΓΊvatar said to them: 'Of the theme that I have declared to you, I will now that ye make in harmony together a Great Music. And since I have kindled you with the Flame Imperishable, ye shall show forth your powers in adorning this theme, each with his own thoughts and devices, if he will. But I win sit and hearken, and be glad that through you great beauty has been wakened into song.'
6
+
7
+ Then the voices of the Ainur, like unto harps and lutes, and pipes and trumpets, and viols and organs, and like unto countless choirs singing with words, began to fashion the theme of IlΓΊvatar to a great music; and a sound arose of endless interchanging melodies woven in harmony that passed beyond hearing into the depths and into the heights, and the places of the dwelling of IlΓΊvatar were filled to overflowing, and the music and the echo of the music went out into the Void, and it was not void. Never since have the Ainur made any music like to this music, though it has been said that a greater still shall be made before IlΓΊvatar by the choirs of the Ainur and the Children of IlΓΊvatar after the end of days. Then the themes of IlΓΊvatar shall be played aright, and take Being in the moment of their utterance, for all shall then understand fully his intent in their part, and each shall know the comprehension of each, and IlΓΊvatar shall give to their thoughts the secret fire, being well pleased.
8
+
9
+ But now IlΓΊvatar sat and hearkened, and for a great while it seemed good to him, for in the music there were no flaws. But as the theme progressed, it came into the heart of Melkor to interweave matters of his own imagining that were not in accord with the theme of IlΓΊvatar, for he sought therein to increase the power and glory of the part assigned to himself. To Melkor among the Ainur had been given the greatest gifts of power and knowledge, and he had a share in all the gifts of his brethren. He had gone often alone into the void places seeking the Imperishable Flame; for desire grew hot within him to bring into Being things of his own, and it seemed to him that IlΓΊvatar took no thought for the Void, and he was impatient of its emptiness. Yet he found not the Fire, for it is with IlΓΊvatar. But being alone he had begun to conceive thoughts of his own unlike those of his brethren.
10
+
11
+ Some of these thoughts he now wove into his music, and straightway discord arose about him, and many that sang nigh him grew despondent, and their thought was disturbed and their music faltered; but some began to attune their music to his rather than to the thought which they had at first. Then the discord of Melkor spread ever wider, and the melodies which had been heard before foundered in a sea of turbulent sound. But IlΓΊvatar sat and hearkened until it seemed that about his throne there was a raging storm, as of dark waters that made war one upon another in an endless wrath that would not be assuaged.
12
+
13
+ Then IlΓΊvatar arose, and the Ainur perceived that he smiled; and he lifted up his left hand, and a new theme began amid the storm, like and yet unlike to the former theme, and it gathered power and had new beauty. But the discord of Melkor rose in uproar and contended with it, and again there was a war of sound more violent than before, until many of the Ainur were dismayed and sang no longer, and Melkor had the mastery. Then again IlΓΊvatar arose, and the Ainur perceived that his countenance was stern; and he lifted up his right hand, and behold! a third theme grew amid the confusion, and it was unlike the others. For it seemed at first soft and sweet, a mere rippling of gentle sounds in delicate melodies; but it could not be quenched, and it took to itself power and profundity. And it seemed at last that there were two musics progressing at one time before the seat of IlΓΊvatar, and they were utterly at variance. The one was deep and wide and beautiful, but slow and blended with an immeasurable sorrow, from which its beauty chiefly came. The other had now achieved a unity of its own; but it was loud, and vain, and endlessly repeated; and it had little harmony, but rather a clamorous unison as of many trumpets braying upon a few notes. And it essayed to drown the other music by the violence of its voice, but it seemed that its most triumphant notes were taken by the other and woven into its own solemn pattern.
14
+
15
+ In the midst of this strife, whereat the halls of IlΓΊvatar shook and a tremor ran out into the silences yet unmoved, IlΓΊvatar arose a third time, and his face was terrible to behold. Then he raised up both his hands, and in one chord, deeper than the Abyss, higher than the Firmament, piercing as the light of the eye of IlΓΊvatar, the Music ceased.
16
+
17
+ Then IlΓΊvatar spoke, and he said: 'Mighty are the Ainur, and mightiest among them is Melkor; but that he may know, and all the Ainur, that I am IlΓΊvatar, those things that ye have sung, I will show them forth, that ye may see what ye have done. And thou, Melkor, shalt see that no theme may be played that hath not its uttermost source in me, nor can any alter the music in my despite. For he that attempteth this shall prove but mine instrument in the devising of things more wonderful, which he himself hath not imagined.'
18
+
19
+ Then the Ainur were afraid, and they did not yet comprehend the words that were said to them; and Melkor was filled with shame, of which came secret anger. But IlΓΊvatar arose in splendour, and he went forth from the fair regions that he had made for the Ainur; and the Ainur followed him.
20
+
21
+ But when they were come into the Void, IlΓΊvatar said to them: 'Behold your Music!' And he showed to them a vision, giving to them sight where before was only hearing; arid they saw a new World made visible before them, and it was globed amid the Void, and it was sustained therein, but was not of it. And as they looked and wondered this World began to unfold its history, and it seemed to them that it lived and grew. And when the Ainur had gazed for a while and were silent, IlΓΊvatar said again: 'Behold your Music! This is your minstrelsy; and each of you shall find contained herein, amid the design that I set before you, all those things which it may seem that he himself devised or added. And thou, Melkor, wilt discover all the secret thoughts of thy mind, and wilt perceive that they are but a part of the whole and tributary to its glory.'
22
+
23
+ And many other things IlΓΊvatar spoke to the Ainur at that time, and because of their memory of his words, and the knowledge that each has of the music that he himself made, the Ainur know much of what was, and is, and is to come, and few things are unseen by them. Yet some things there are that they cannot see, neither alone nor taking counsel together; for to none but himself has IlΓΊvatar revealed all that he has in store, and in every age there come forth things that are new and have no foretelling, for they do not proceed from the past. And so it was that as this vision of the World was played before them, the Ainur saw that it contained things which they had not thought. And they saw with amazement the coming of the Children of IlΓΊvatar, and the habitation that was prepared for them; and they perceived that they themselves in the labour of their music had been busy with the preparation of this dwelling, and yet knew not that it had any purpose beyond its own beauty. For the Children of IlΓΊvatar were conceived by him alone; and they came with the third theme, and were not in the theme which IlΓΊvatar propounded at the beginning, and none of the Ainur had part in their making. Therefore when they beheld them, the more did they love them, being things other than themselves, strange and free, wherein they saw the mind of IlΓΊvatar reflected anew, and learned yet a little more of his wisdom, which otherwise had been hidden even from the Ainur.
24
+
25
+ Now the Children of IlΓΊvatar are Elves and Men, the Firstborn and the Followers. And amid all the splendours of the World, its vast halls and spaces, and its wheeling fires, IlΓΊvatar chose a place for their habitation in the Deeps of Time and in the midst of the innumerable stars. And this habitation might seem a little thing to those who consider only the majesty of the Ainur, and not their terrible sharpness; as who should take the whole field of Arda for the foundation of a pillar and so raise it until the cone of its summit were more bitter than a needle; or who consider only the immeasurable vastness of the World, which still the Ainur are shaping, and not the minute precision to which they shape all things therein. But when the Ainur had beheld this habitation in a vision and had seen the Children of IlΓΊvatar arise therein, then many of the most mighty among them bent all their thought and their desire towards that place. And of these Melkor was the chief, even as he was in the beginning the greatest of the Ainur who took part in the Music. And he feigned, even to himself at first, that he desired to go thither and order all things for the good of the Children of IlΓΊvatar, controlling the turmoils of the heat and the cold that had come to pass through him. But he desired rather to subdue to his will both Elves and Men, envying the gifts with which IlΓΊvatar promised to endow them; and he wished himself to have subject and servants, and to be called Lord, and to be a master over other wills.
26
+
27
+ But the other Ainur looked upon this habitation set within the vast spaces of the World, which the Elves call Arda, the Earth; and their hearts rejoiced in light, and their eyes beholding many colours were filled with gladness; but because of the roaring of the sea they felt a great unquiet. And they observed the winds and the air, and the matters of which Arda was made, of iron and stone and silver and gold and many substances: but of all these water they most greatly praised. And it is said by the Eldar that in water there lives yet the echo of the Music of the Ainur more than in any substance else that is in this Earth; and many of the Children of IlΓΊvatar hearken still unsated to the voices of the Sea, and yet know not for what they listen.
28
+
29
+ Now to water had that Ainu whom the Elves can Ulmo turned his thought, and of all most deeply was he instructed by IlΓΊvatar in music. But of the airs and winds ManwΓ« most had pondered, who is the noblest of the Ainur. Of the fabric of Earth had AulΓ« thought, to whom IlΓΊvatar had given skin and knowledge scarce less than to Melkor; but the delight and pride of AulΓ« is in the deed of making, and in the thing made, and neither in possession nor in his own mastery; wherefore he gives and hoards not, and is free from care, passing ever on to some new work.
30
+
31
+ And IlΓΊvatar spoke to Ulmo, and said: 'Seest thou not how here in this little realm in the Deeps of Time Melkor hath made war upon thy province? He hath bethought him of bitter cold immoderate, and yet hath not destroyed the beauty of thy fountains, nor of my clear pools. Behold the snow, and the cunning work of frost! Melkor hath devised heats and fire without restraint, and hath not dried up thy desire nor utterly quelled the music of the sea. Behold rather the height and glory of the clouds, and the everchanging mists; and listen to the fall of rain upon the Earth! And in these clouds thou art drawn nearer to ManwΓ«, thy friend, whom thou lovest.'
32
+
33
+ Then Ulmo answered: 'Truly, Water is become now fairer than my heart imagined, neither had my secret thought conceived the snowflake, nor in all my music was contained the falling of the rain. I will seek ManwΓ«, that he and I may make melodies for ever to my delight!' And ManwΓ« and Ulmo have from the beginning been allied, and in all things have served most faithfully the purpose of IlΓΊvatar.
34
+
35
+ But even as Ulmo spoke, and while the Ainur were yet gazing upon this vision, it was taken away and hidden from their sight; and it seemed to them that in that moment they perceived a new thing, Darkness, which they had not known before except in thought. But they had become enamoured of the beauty of the vision and engrossed in the unfolding of the World which came there to being, and their minds were filled with it; for the history was incomplete and the circles of time not full-wrought when the vision was taken away. And some have said that the vision ceased ere the fulfilment of the Dominion of Men and the fading of the Firstborn; wherefore, though the Music is over all, the Valar have not seen as with sight the Later Ages or the ending of the World.
36
+
37
+ Then there was unrest among the Ainur; but IlΓΊvatar called to them, and said: 'I know the desire of your minds that what ye have seen should verily be, not only in your thought, but even as ye yourselves are, and yet other. Therefore I say: EΓ€! Let these things Be! And I will send forth into the Void the Flame Imperishable, and it shall be at the heart of the World, and the World shall Be; and those of you that will may go down into it. And suddenly the Ainur saw afar off a light, as it were a cloud with a living heart of flame; and they knew that this was no vision only, but that IlΓΊvatar had made a new thing: EΓ€, the World that Is.
38
+
39
+ Thus it came to pass that of the Ainur some abode still with IlΓΊvatar beyond the confines of the World; but others, and among them many of the greatest and most fair, took the leave of IlΓΊvatar and descended into it. But this condition IlΓΊvatar made, or it is the necessity of their love, that their power should thenceforward be contained and bounded in the World, to be within it for ever, until it is complete, so that they are its life and it is theirs. And therefore they are named the Valar, the Powers of the World.
40
+
41
+ But when the Valar entered into EΓ€ they were at first astounded and at a loss, for it was as if naught was yet made which they had seen in vision, and all was but on point to begin and yet unshaped, and it was dark. For the Great Music had been but the growth and flowering of thought in the Tuneless Halls, and the Vision only a foreshowing; but now they had entered in at the beginning of Time, and the Valar perceived that the World had been but foreshadowed and foresung, and they must achieve it. So began their great labours in wastes unmeasured and unexplored, and in ages uncounted and forgotten, until in the Deeps of Time and in the midst of the vast halls of EΓ€ there came to be that hour and that place where was made the habitation of the Children of IlΓΊvatar. And in this work the chief part was taken by ManwΓ« and AulΓ« and Ulmo; but Melkor too was there from the first, and he meddled in all that was done, turning it if he might to his own desires and purposes; and he kindled great fires. When therefore Earth was yet young and full of flame Melkor coveted it, and he said to the other Valar: 'This shall be my own kingdom; and I name it unto myself!'
42
+
43
+ But ManwΓ« was the brother of Melkor in the mind of IlΓΊvatar, and he was the chief instrument of the second theme that IlΓΊvatar had raised up against the discord of Melkor; and he called unto himself many spirits both greater and less, and they came down into the fields of Arda and aided ManwΓ«, lest Melkor should hinder the fulfilment of their labour for ever, and Earth should wither ere it flowered. And ManwΓ« said unto Melkor: 'This kingdom thou shalt not take for thine own, wrongfully, for many others have laboured here do less than thou.' And there was strife between Melkor and the other Valar; and for that time Melkor withdrew and departed to other regions and did there what he would; but he did not put the desire of the Kingdom of Arda from his heart.
44
+
45
+ Now the Valar took to themselves shape and hue; and because they were drawn into the World by love of the Children of IlΓΊvatar, for whom they hoped, they took shape after that manner which they had beheld in the Vision of IlΓΊvatar, save only in majesty and splendour. Moreover their shape comes of their knowledge of the visible World, rather than of the World itself; and they need it not, save only as we use raiment, and yet we may be naked and suffer no loss of our being. Therefore the Valar may walk, if they will, unclad, and then even the Eldar cannot clearly perceive them, though they be present. But when they desire to clothe themselves the Valar take upon them forms some as of male and some as of female; for that difference of temper they had even from their beginning, and it is but bodied forth in the choice of each, not made by the choice, even as with us male and female may be shown by the raiment but is not made thereby. But the shapes wherein the Great Ones array themselves are not at all times like to the shapes of the kings and queens of the Children of IlΓΊvatar; for at times they may clothe themselves in their own thought, made visible in forms of majesty and dread.
46
+
47
+ And the Valar drew unto them many companions, some less, some well nigh as great as themselves, and they laboured together in the ordering of the Earth and the curbing of its tumults. Then Melkor saw what was done, and that the Valar walked on Earth as powers visible, clad in the raiment of the World, and were lovely and glorious to see, and blissful, and that the Earth was becoming as a garden for their delight, for its turmoils were subdued. His envy grew then the greater within him; and he also took visible form, but because of his mood and the malice that burned in him that form was dark and terrible. And he descended upon Arda in power and majesty greater than any other of the Valar, as a mountain that wades in the sea and has its head above the clouds and is clad in ice and crowned with smoke and fire; and the light of the eyes of Melkor was like a flame that withers with heat and pierces with a deadly cold.
48
+
49
+ Thus began the first battle of the Valar with Melkor for the dominion of Arda; and of those tumults the Elves know but little. For what has here been declared is come from the Valar themselves, with whom the EldaliΓ« spoke in the land of Valinor, and by whom they were instructed; but little would the Valar ever tell of the wars before the coming of the Elves. Yet it is told among the Eldar that the Valar endeavoured ever, in despite of Melkor, to rule the Earth and to prepare it for the coming of the Firstborn; and they built lands and Melkor destroyed them; valleys they delved and Melkor raised them up; mountains they carved and Melkor threw them down; seas they hollowed and Melkor spilled them; and naught might have peace or come to lasting growth, for as surely as the Valar began a labour so would Melkor undo it or corrupt it. And yet their labour was not all in vain; and though nowhere and in no work was their will and purpose wholly fulfilled, and all things were in hue and shape other than the Valar had at first intended, slowly nonetheless the Earth was fashioned and made firm. And thus was the habitation of the Children of IlΓΊvatar established at the last in the Deeps of Time and amidst the innumerable stars.
examples/LOTR-S-VALAQUENTA.txt ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Account of the Valar and Maiar according to the lore of the Eldar
2
+ In the beginning Eru, the One, who in the Elvish tongue is named IlΓΊvatar, made the Ainur of his thought; and they made a great Music before him. In this Music the World was begun; for IlΓΊvatar made visible the song of the Ainur, and they beheld it as a light in the darkness. And many among them became enamoured of its beauty, and of its history which they saw beginning and unfolding as in a vision. Therefore IlΓΊvatar gave to their vision Being, and set it amid the Void, and the Secret Fire was sent to burn at the heart of the World; and it was called EΓ€.
3
+
4
+ Then those of the Ainur who desired it arose and entered into the World at the beginning of Time; and it was their task to achieve it, and by their labours to fulfil the vision which they had seen. Long they laboured in the regions of EΓ€, which are vast beyond the thought of Elves and Men, until in the time appointed was made Arda, the Kingdom of Earth. Then they put on the raiment of Earth and descended into it, and dwelt therein.
5
+
6
+ Of the Valar
7
+ The Great among these spirits the Elves name the Valar, the Powers of Arda, and Men have often called them gods. The Lords of the Valar are seven; and the Valier, the Queens of the Valar, are seven also. These were their names in the Elvish tongue as it was spoken in Valinor, though they have other names in the speech of the Elves in Middle-earth, and their names among Men are manifold. The names of the Lords in due order are: ManwΓ«, Ulmo, AulΓ«, OromΓ«, Mandos, LΓ³rien, and Tulkas; and the names of the Queens are: Varda, Yavanna, Nienna, EstΓ«, VairΓ«, VΓ‘na, and Nessa. Melkor is counted no longer among the Valar, and his name is not spoken upon Earth.
8
+
9
+ ManwΓ« and Melkor were brethren in the thought of IlΓΊvatar. The mightiest of those Ainur who came into the World was in his beginning Melkor; but ManwΓ« is dearest to IlΓΊvatar and understands most clearly his purposes. He was appointed to be, in the fullness of time, the first of all Kings: lord of the realm of Arda and ruler of all that dwell therein. In Arda his delight is in the winds and the clouds, and in all the regions of the air, from the heights to the depths, from the utmost borders of the Veil of Arda to the breezes that blow in the grass. SΓΊlimo he is surnamed, Lord of the Breath of Arda. All swift birds, strong of wing, he loves, and they come and go at his bidding.
10
+
11
+ With ManwΓ« dwells Varda, Lady of the Stars, who knows all the regions of EΓ€. Too great is her beauty to be declared in the words of Men or of Elves; for the light of IlΓΊvatar lives still in her face. In light is her power and her joy. Out of the deeps of EΓ€ she came to the aid of ManwΓ«; for Melkor she knew from before the making of the Music and rejected him, and he hated her, and feared her more than all others whom Eru made. ManwΓ« and Varda are seldom parted, and they remain in Valinor. Their halls are above the everlasting snow, upon OiolossΓ«, the uttermost tower of Taniquetil, tallest of all the mountains upon Earth. When ManwΓ« there ascends his throne and looks forth, if Varda is beside him, he sees further than all other eyes, through mist, and through darkness, and over the leagues of the sea. And if ManwΓ« is with her, Varda hears more clearly than all other ears the sound of voices that cry from east to west, from the hills and the valleys, and from the dark places that Melkor has made upon Earth. Of all the Great Ones who dwell in this world the Elves hold Varda most in reverence and love. Elbereth they name her, and they call upon her name out of the shadows of Middle-earth, and uplift it in song at the rising of the stars.
12
+
13
+ Ulmo is the Lord of Waters. He is alone. He dwells nowhere long, but moves as he will in all the deep waters about the Earth or under the Earth. He is next in might to ManwΓ«, and before Valinor was made he was closest to him in friendship; but thereafter he went seldom to the councils of the Valar, unless great matters were in debate. For he kept all Arda in thought, and he has no need of any resting-place. Moreover he does not love to walk upon land, and will seldom clothe himself in a body after the manner of his peers. If the Children of Eru beheld him they were filled with a great dread; for the arising of the King of the Sea was terrible, as a mounting wave that strides to the land, with dark helm foam-crested and raiment of mail shimmering from silver down into shadows of green. The trumpets of ManwΓ« are loud, but Ulmo's voice is deep as the deeps of the ocean which he only has seen.
14
+
15
+ Nonetheless Ulmo loves both Elves and Men, and never abandoned them, not even when they lay under the wrath of the Valar. At times he win come unseen to the shores of Middle-earth, or pass far inland up firths of the sea, and there make music upon his great horns, the UlumΓΊri, that are wrought of white shell; and those to whom that music comes hear it ever after in their hearts, and longing for the sea never leaves them again. But mostly Ulmo speaks to those who dwell in Middle-earth with voices that are heard only as the music of water. For all seas, lakes, rivers, fountains and springs are in his government; so that the Elves say that the spirit of Ulmo runs in all the veins of the world. Thus news comes to Ulmo, even in the deeps, of all the needs and griefs of Arda, which otherwise would be hidden from ManwΓ«.
16
+
17
+ AulΓ« has might little less than Ulmo. His lordship is over all the substances of which Arda is made. In the beginning he wrought much in fellowship with ManwΓ« and Ulmo; and the fashioning of all lands was his labour. He is a smith and a master of all crafts, and he delights in works of skill, however small, as much as in the mighty building of old. His are the gems that lie deep in the Earth and the gold that is fair in the hand, no less than the walls of the mountains and the basins of the sea. The Noldor learned most of him, and he was ever their friend. Melkor was jealous of him, for AulΓ« was most like himself in thought and in powers; and there was long strife between them, in which Melkor ever marred or undid the works of AulΓ«, and AulΓ« grew weary in repairing the tumults and disorders of Melkor. Both, also, desired to make things of their own that should be new and unthought of by others, and delighted in the praise of their skill. But AulΓ« remained faithful to Eru and submitted all that he did to his will; and he did not envy the works of others, but sought and gave counsel. Whereas Melkor spent his spirit in envy and hate, until at last he could make nothing save in mockery of the thought of others, and all their works he destroyed if he could.
18
+
19
+ The spouse of AulΓ« is Yavanna, the Giver of Fruits. She is the lover of all things that grow in the earth, and all their countless forms she holds in her mind, from the trees like towers in forests long ago to the moss upon stones or the small and secret things in the mould. In reverence Yavanna is next to Varda among the Queens of the Valar. In the form of a woman she is tall, and robed in green; but at times she takes other shapes. Some there are who have seen her standing like a tree under heaven, crowned with the Sun; and from all its branches there spilled a golden dew upon the barren earth, and it grew green with corn; but the roots of the tree were in the waters of Ulmo, and the winds of ManwΓ« spoke in its leaves. KementΓ‘ri, Queen of the Earth, she is surnamed in the Eldarin tongue.
20
+
21
+ The FΓ«anturi, masters of spirits, are brethren, and they are called most often Mandos and LΓ³rien. Yet these are rightly the names of the places of their dwelling, and their true names are NΓ‘mo and Irmo.
22
+
23
+ NΓ‘mo the elder dwells in Mandos, which is westward in Valinor. He is the keeper of the Houses of the Dead, and the summoner of the spirits of the slain. He forgets nothing; and he knows all things that shall be, save only those that lie still in the freedom of IlΓΊvatar. He is the Doomsman of the Valar; but he pronounces his dooms and his Judgements only at the bidding of ManwΓ«. VairΓ« the Weaver is his spouse, who weaves all things that have ever been in Time into her storied webs, and the halls of Mandos that ever widen as the ages pass are clothed with them.
24
+
25
+ Irmo the younger is the master of visions and dreams. In LΓ³rien are his gardens in the land of the Valar, and they are the fairest of all places in the world, filled with many spirits. EstΓ« the gentle, healer of hurts and of weariness, is his spouse. Grey is her raiment; and rest is her gift. She walks not by day, but sleeps upon an island in the tree-shadowed lake of LΓ³rellin. From the fountains of Irmo and EstΓ« all those who dwell in Valinor draw refreshment; and often the Valar come themselves to LΓ³rien and there find repose and easing of the burden of Arda.
26
+
27
+ Mightier than EstΓ« is Nienna, sister of the FΓ«anturi; she dwells alone. She is acquainted with grief, and mourns for every wound that Arda has suffered in the marring of Melkor. So great was her sorrow, as the Music unfolded, that her song turned to lamentation long before its end, and the sound of mourning was woven into the themes of the World before it began. But she does not weep for herself; and those who hearken to her learn pity, and endurance in hope. Her halls are west of West, upon the borders of the world; and she comes seldom to the city of Valimar where all is glad. She goes rather to the halls of Mandos, which are near to her own; and all those who wait in Mandos cry to her, for she brings strength to the spirit and turns sorrow to wisdom. The windows of her house look outward from the walls of the world.
28
+
29
+ Greatest in strength and deeds of prowess is Tulkas, who is surnamed Astaldo, the Valiant. He came last to Arda, to aid the Valar in the first battles with Melkor. He delights in wrestling and in contests of strength; and he rides no steed, for he can outrun all things that go on feet, and he is tireless. His hair and beard are golden, and his flesh ruddy; his weapons are his hands. He has little heed for either the past or the future, and is of no avail as a counsellor, but is a hardy friend. His spouse is Nessa, the sister of OromΓ«, and she also is lithe and fleetfooted. Deer she loves, and they follow her train whenever she goes in the wild; but she can outrun them, swift as an arrow with the wind in her hair. In dancing she delights, and she dances in Valimar on lawns of never-fading green.
30
+
31
+ OromΓ« is a mighty lord. If he is less strong than Tulkas, he is more dreadful in anger; whereas Tulkas laughs ever, in sport or in war, and even in the face of Melkor he laughed in battles before the Elves were born. OromΓ« loved the lands of Middle-earth, and he left them unwillingly and came last to Valinor; and often of old he passed back east over the mountains and returned with his host to the hills and the plains. He is a hunter of monsters and fell beasts, and he delights in horses and in hounds; and all trees he loves, for which reason he is called Aldaron, and by the Sindar Tauron, the Lord of Forests. Nahar is the name of his horse, white in the sun, and shining silver at night. The ValarΓ³ma is the name of his great horn, the sound of which is like the upgoing of the Sun in scarlet, or the sheer lightning cleaving the clouds. Above all the horns of his host it was heard in the woods that Yavanna brought forth in Valinor; for there OromΓ« would train his folk and his beasts for the pursuit of the evil creatures of Melkor. The spouse of OromΓ« is VΓ‘na, the Ever-young; she is the younger sister of Yavanna. All flowers spring as she passes and open if she glances upon them; and all birds sing at her coming.
32
+
33
+ These are the names of the Valar and the Valier, and here is told in brief their likenesses, such as the Eldar beheld them in Aman. But fair and noble as were the forms in which they were manifest to the Children of IlΓΊvatar, they were but a veil upon their beauty and their power. And if little is here said of all that the Eldar once knew, that is as nothing compared with their true being, which goes back into regions and ages far beyond our thought. Among them Nine were of chief power and reverence; but one is removed from their number, and Eight remain, the Aratar, the High Ones of Arda: ManwΓ« and Varda, Ulmo, Yavanna and AulΓ«, Mandos, Nienna, and OromΓ«. Though ManwΓ« is their King and holds their allegiance under Eru, in majesty they are peers, surpassing beyond compare all others, whether of the Valar and the Maiar, or of any other order that IlΓΊvatar has sent into EΓ€.
34
+
35
+ Of the Maiar
36
+ With the Valar came other spirits whose being also began before the World, of the same order as the Valar but of less degree. These are the Maiar, the people of the Valar, and their servants and helpers. Their number is not known to the Elves, and few have names in any of the tongues of the Children of IlΓΊvatar; for though it is otherwise in Aman, in Middle-earth the Maiar have seldom appeared in form visible to Elves and Men.
37
+
38
+ Chief among the Maiar of Valinor whose names are remembered in the histories of the Elder Days are IlmarΓ«, the handmaid of Varda, and EΓΆnwΓ«, the banner-bearer and herald of ManwΓ«, whose might in arms is surpassed by none in Arda. But of all the Maiar OssΓ« and Uinen are best known to the Children of IlΓΊvatar.
39
+
40
+ OssΓ« is a vassal of Ulmo, and he is master of the seas that wash the shores of Middle-earth. He does not go in the deeps, but loves the coasts and the isles, and rejoices in the winds of ManwΓ«; for in storm he delights, and laughs amid the roaring of the waves. His spouse is Uinen, the Lady of the Seas, whose hair lies spread through all waters under sky. All creatures she loves that live in the salt streams, and all weeds that grow there; to her mariners cry, for she can lay calm upon the waves, restraining the wildness of OssΓ«. The NΓΊmenΓ³reans lived long in her protection, and held her in reverence equal to the Valar.
41
+
42
+ Melkor hated the Sea, for he could not subdue it. It is said that in the making of Arda he endeavoured to draw OssΓ« to his allegiance, promising to him all the realm and power of Ulmo, if he would serve him. So it was that long ago there arose great tumults in the sea that wrought ruin to the lands. But Uinen, at the prayer of AulΓ«, restrained OssΓ« and brought him before Ulmo; and he was pardoned and returned to his allegiance, to which he has remained faithful. For the most part; for the delight in violence has never wholly departed from him, and at times he will rage in his wilfulness without any command from Ulmo his lord. Therefore those who dwell by the sea or go up in ships may love him, but they do not trust him.
43
+
44
+ Melian was the name of a Maia who served both VΓ‘na and EstΓ«; she dwelt long in LΓ³rien, tending the trees that flower in the gardens of Irmo, ere she came to Middle-earth. Nightingales sang about her wherever she went.
45
+
46
+ Wisest of the Maiar was OlΓ³rin. He too dwelt in LΓ³rien, but his ways took him often to the house of Nienna, and of her he learned pity and patience.
47
+
48
+ Of Melian much is told in the Quenta Silmarillion. But of OlΓ³rin that tale does not speak; for though he loved the Elves, he walked among them unseen, or in form as one of them, and they did not know whence came the fair visions or the promptings of wisdom that he put into their hearts. In later days he was the friend of all the Children of IlΓΊvatar, and took pity on their sorrows; and those who listened to him awoke from despair and put away the imaginations of darkness.
49
+
50
+ Of the Enemies
51
+ Last of all is set the name of Melkor, He who arises in Might. But that name he has forfeited; and the Noldor, who among the Elves suffered most from his malice, will not utter it, and they name him Morgoth, the Dark Enemy of the World. Great might was given to him by IlΓΊvatar, and he was coeval with ManwΓ«. In the powers and knowledge of all the other Valar he had part, but he turned them to evil purposes, and squandered his strength in violence and tyranny. For he coveted Arda and all that was in it, desiring the kingship of ManwΓ« and dominion over the realms of his peers.
52
+
53
+ From splendour he fell through arrogance to contempt for all things save himself, a spirit wasteful and pitiless. Understanding he turned to subtlety in perverting to his own will all that he would use, until he became a liar without shame. He began with the desire of Light, but when he could not possess it for himself alone, he descended through fire and wrath into a great burning, down into Darkness. And darkness he used most in his evil works upon Arda, and filled it with fear for all living things.
54
+
55
+ Yet so great was the power of his uprising that in ages forgotten he contended with ManwΓ« and all the Valar, and through long years in Arda held dominion over most of the lands of the Earth. But he was not alone. For of the Maiar many were drawn to his splendour in the days of his greatness, and remained in that allegiance down into his darkness; and others he corrupted afterwards to his service with lies and treacherous gifts. Dreadful among these spirits were the Valaraukar, the scourges of fire that in Middle-earth were called the Balrogs, demons of terror.
56
+
57
+ Among those of his servants that have names the greatest was that spirit whom the Eldar called Sauron, or Gorthaur the Cruel. In his beginning he was of the Maiar of AulΓ«, and he remained mighty in the lore of that people. In all the deeds of Melkor the Morgoth upon Arda, in his vast works and in the deceits of his cunning, Sauron had a part, and was only less evil than his master in that for long he served another and not himself. But in after years he rose like a shadow of Morgoth and a ghost of his malice, and walked behind him on the same ruinous path down into the Void.
58
+
59
+ HERE ENDS THE VALAQUENTA
60
+
61
+
pdf2text.py ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+
4
+ easyocr.py - A wrapper for easyocr to convert pdf to images to text
5
+ """
6
+
7
+ import logging
8
+ from pathlib import Path
9
+
10
+ logging.basicConfig(
11
+ level=logging.INFO,
12
+ format="%(asctime)s %(levelname)s %(message)s",
13
+ datefmt="%m/%d/%Y %I:%M:%S",
14
+ )
15
+
16
+
17
+ import os
18
+ import pprint as pp
19
+ import re
20
+ import shutil
21
+ import time
22
+ from datetime import date, datetime
23
+ from os.path import basename, dirname, join
24
+ from pathlib import Path
25
+
26
+ from cleantext import clean
27
+ from doctr.io import DocumentFile
28
+ from doctr.models import ocr_predictor
29
+ from libretranslatepy import LibreTranslateAPI
30
+ from natsort import natsorted
31
+ from spellchecker import SpellChecker
32
+ from tqdm.auto import tqdm
33
+
34
+
35
+ def simple_rename(filepath, target_ext=".txt"):
36
+ _fp = Path(filepath)
37
+ basename = _fp.stem
38
+ return f"OCR_{basename}_{target_ext}"
39
+
40
+
41
+ def rm_local_text_files(name_contains="RESULT_"):
42
+ """
43
+ rm_local_text_files - remove local text files
44
+
45
+ Args:
46
+ name_contains (str, optional): [description]. Defaults to "OCR_".
47
+ """
48
+ files = [
49
+ f
50
+ for f in Path.cwd().iterdir()
51
+ if f.is_file() and f.suffix == ".txt" and name_contains in f.name
52
+ ]
53
+ logging.info(f"removing {len(files)} text files")
54
+ for f in files:
55
+ os.remove(f)
56
+ logging.info("done")
57
+
58
+
59
+ def corr(
60
+ s: str,
61
+ add_space_when_numerics=False,
62
+ exceptions=["e.g.", "i.e.", "etc.", "cf.", "vs.", "p."],
63
+ ) -> str:
64
+ """corrects spacing in a string
65
+
66
+ Args:
67
+ s (str): the string to correct
68
+ add_space_when_numerics (bool, optional): [add a space when a period is between two numbers, example 5.73]. Defaults to False.
69
+ exceptions (list, optional): [do not change these substrings]. Defaults to ['e.g.', 'i.e.', 'etc.', 'cf.', 'vs.', 'p.'].
70
+
71
+ Returns:
72
+ str: the corrected string
73
+ """
74
+ if add_space_when_numerics:
75
+ s = re.sub(r"(\d)\.(\d)", r"\1. \2", s)
76
+
77
+ s = re.sub(r"\s+", " ", s)
78
+ s = re.sub(r'\s([?.!"](?:\s|$))', r"\1", s)
79
+
80
+ # fix space before apostrophe
81
+ s = re.sub(r"\s\'", r"'", s)
82
+ # fix space after apostrophe
83
+ s = re.sub(r"'\s", r"'", s)
84
+ # fix space before comma
85
+ s = re.sub(r"\s,", r",", s)
86
+
87
+ for e in exceptions:
88
+ expected_sub = re.sub(r"\s", "", e)
89
+ s = s.replace(expected_sub, e)
90
+
91
+ return s
92
+
93
+
94
+ def fix_punct_spaces(string):
95
+ """
96
+ fix_punct_spaces - replace spaces around punctuation with punctuation. For example, "hello , there" -> "hello, there"
97
+
98
+ Parameters
99
+ ----------
100
+ string : str, required, input string to be corrected
101
+
102
+ Returns
103
+ -------
104
+ str, corrected string
105
+ """
106
+
107
+ fix_spaces = re.compile(r"\s*([?!.,]+(?:\s+[?!.,]+)*)\s*")
108
+ string = fix_spaces.sub(lambda x: "{} ".format(x.group(1).replace(" ", "")), string)
109
+ string = string.replace(" ' ", "'")
110
+ string = string.replace(' " ', '"')
111
+ return string.strip()
112
+
113
+
114
+ def clean_OCR(ugly_text: str):
115
+ """
116
+ clean_OCR - clean the OCR text files.
117
+
118
+ Parameters
119
+ ----------
120
+ ugly_text : str, required, input string to be cleaned
121
+
122
+ Returns
123
+ -------
124
+ str, cleaned string
125
+ """
126
+ # Remove all the newlines.
127
+ cleaned_text = ugly_text.replace("\n", " ")
128
+ # Remove all the tabs.
129
+ cleaned_text = cleaned_text.replace("\t", " ")
130
+ # Remove all the double spaces.
131
+ cleaned_text = cleaned_text.replace(" ", " ")
132
+ # Remove all the spaces at the beginning of the text.
133
+ cleaned_text = cleaned_text.lstrip()
134
+ # remove all instances of "- " and " - "
135
+ cleaned_text = cleaned_text.replace("- ", "")
136
+ cleaned_text = cleaned_text.replace(" -", "")
137
+ return fix_punct_spaces(cleaned_text)
138
+
139
+
140
+ def move2completed(from_dir, filename, new_folder="completed", verbose=False):
141
+
142
+ # this is the better version
143
+ old_filepath = join(from_dir, filename)
144
+
145
+ new_filedirectory = join(from_dir, new_folder)
146
+
147
+ if not os.path.isdir(new_filedirectory):
148
+ os.mkdir(new_filedirectory)
149
+ if verbose:
150
+ print("created new directory for files at: \n", new_filedirectory)
151
+ new_filepath = join(new_filedirectory, filename)
152
+
153
+ try:
154
+ shutil.move(old_filepath, new_filepath)
155
+ logging.info("successfully moved the file {} to */completed.".format(filename))
156
+ except:
157
+ logging.info(
158
+ "ERROR! unable to move file to \n{}. Please investigate".format(
159
+ new_filepath
160
+ )
161
+ )
162
+
163
+
164
+ """## pdf2text functions
165
+
166
+ """
167
+
168
+
169
+ custom_replace_list = {
170
+ "t0": "to",
171
+ "'$": "'s",
172
+ ",,": ", ",
173
+ "_ ": " ",
174
+ " '": "'",
175
+ }
176
+
177
+ replace_corr_exceptions = {
178
+ "i. e.": "i.e.",
179
+ "e. g.": "e.g.",
180
+ "e. g": "e.g.",
181
+ " ,": ",",
182
+ }
183
+
184
+
185
+ spell = SpellChecker()
186
+
187
+
188
+ def check_word_spelling(word: str) -> bool:
189
+ """
190
+ check_word_spelling - check the spelling of a word
191
+
192
+ Args:
193
+ word (str): word to check
194
+
195
+ Returns:
196
+ bool: True if word is spelled correctly, False if not
197
+ """
198
+
199
+ misspelled = spell.unknown([word])
200
+
201
+ return len(misspelled) == 0
202
+
203
+
204
+ def eval_and_replace(text: str, match_token: str = "- ") -> str:
205
+ """
206
+ eval_and_replace - conditionally replace all instances of a substring in a string based on whether the eliminated substring results in a valid word
207
+
208
+ Args:
209
+ text (str): text to evaluate
210
+ match_token (str, optional): token to replace. Defaults to "- ".
211
+
212
+ Returns:
213
+ str: text with replaced tokens
214
+ """
215
+
216
+ if match_token not in text:
217
+ return text
218
+ else:
219
+ while True:
220
+ full_before_text = text.split(match_token, maxsplit=1)[0]
221
+ before_text = [
222
+ char for char in full_before_text.split()[-1] if char.isalpha()
223
+ ]
224
+ before_text = "".join(before_text)
225
+ full_after_text = text.split(match_token, maxsplit=1)[-1]
226
+ after_text = [char for char in full_after_text.split()[0] if char.isalpha()]
227
+ after_text = "".join(after_text)
228
+ full_text = before_text + after_text
229
+ if check_word_spelling(full_text):
230
+ text = full_before_text + full_after_text
231
+ else:
232
+ text = full_before_text + " " + full_after_text
233
+ if match_token not in text:
234
+ break
235
+ return text
236
+
237
+
238
+ def cleantxt_ocr(ugly_text, lower=False, lang: str = "en") -> str:
239
+ """
240
+ cleantxt_ocr - clean text from OCR
241
+
242
+ Args:
243
+ ugly_text (str): text to clean
244
+ lower (bool, optional): _description_. Defaults to False.
245
+ lang (str, optional): _description_. Defaults to "en".
246
+
247
+ Returns:
248
+ str: cleaned text
249
+ """
250
+ # a wrapper for clean text with options different than default
251
+
252
+ # https://pypi.org/project/clean-text/
253
+ cleaned_text = clean(
254
+ ugly_text,
255
+ fix_unicode=True, # fix various unicode errors
256
+ to_ascii=True, # transliterate to closest ASCII representation
257
+ lower=lower, # lowercase text
258
+ no_line_breaks=True, # fully strip line breaks as opposed to only normalizing them
259
+ no_urls=True, # replace all URLs with a special token
260
+ no_emails=True, # replace all email addresses with a special token
261
+ no_phone_numbers=False, # replace all phone numbers with a special token
262
+ no_numbers=False, # replace all numbers with a special token
263
+ no_digits=False, # replace all digits with a special token
264
+ no_currency_symbols=False, # replace all currency symbols with a special token
265
+ no_punct=False, # remove punctuations
266
+ replace_with_punct="", # instead of removing punctuations you may replace them
267
+ replace_with_url="<URL>",
268
+ replace_with_email="<EMAIL>",
269
+ replace_with_phone_number="<PHONE>",
270
+ replace_with_number="<NUM>",
271
+ replace_with_digit="0",
272
+ replace_with_currency_symbol="<CUR>",
273
+ lang=lang, # set to 'de' for German special handling
274
+ )
275
+
276
+ return cleaned_text
277
+
278
+
279
+ def format_ocr_out(OCR_data):
280
+
281
+ if isinstance(OCR_data, list):
282
+ text = " ".join(OCR_data)
283
+ else:
284
+ text = str(OCR_data)
285
+ _clean = cleantxt_ocr(text)
286
+ return corr(_clean)
287
+
288
+
289
+ def postprocess(text: str) -> str:
290
+ """to be used after recombining the lines"""
291
+
292
+ proc = corr(cleantxt_ocr(text))
293
+
294
+ for k, v in custom_replace_list.items():
295
+ proc = proc.replace(str(k), str(v))
296
+
297
+ proc = corr(proc)
298
+
299
+ for k, v in replace_corr_exceptions.items():
300
+ proc = proc.replace(str(k), str(v))
301
+
302
+ return eval_and_replace(proc)
303
+
304
+
305
+ def result2text(result, as_text=False) -> str or list:
306
+ """Convert OCR result to text"""
307
+
308
+ full_doc = []
309
+ for i, page in enumerate(result.pages, start=1):
310
+ text = ""
311
+ for block in page.blocks:
312
+ text += "\n\t"
313
+ for line in block.lines:
314
+ for word in line.words:
315
+ # print(dir(word))
316
+ text += word.value + " "
317
+ full_doc.append(text)
318
+
319
+ return "\n".join(full_doc) if as_text else full_doc
320
+
321
+
322
+ def convert_PDF_to_Text(
323
+ PDF_file,
324
+ ocr_model=None,
325
+ max_pages: int = 20,
326
+ ):
327
+
328
+ st = time.perf_counter()
329
+ PDF_file = Path(PDF_file)
330
+ ocr_model = ocr_predictor(pretrained=True) if ocr_model is None else ocr_model
331
+ logging.info(f"starting OCR on {PDF_file.name}")
332
+ doc = DocumentFile.from_pdf(PDF_file)
333
+ truncated = False
334
+ if len(doc) > max_pages:
335
+ logging.warning(
336
+ f"PDF has {len(doc)} pages, which is more than {max_pages}.. truncating"
337
+ )
338
+ doc = doc[:max_pages]
339
+ truncated = True
340
+
341
+ # Analyze
342
+ logging.info(f"running OCR on {len(doc)} pages")
343
+ result = ocr_model(doc)
344
+ raw_text = result2text(result)
345
+ proc_text = [format_ocr_out(r) for r in raw_text]
346
+ fin_text = [postprocess(t) for t in proc_text]
347
+
348
+ ocr_results = "\n\n".join(fin_text)
349
+
350
+ fn_rt = time.perf_counter() - st
351
+
352
+ logging.info("OCR complete")
353
+
354
+ results_dict = {
355
+ "num_pages": len(doc),
356
+ "runtime": round(fn_rt, 2),
357
+ "date": str(date.today()),
358
+ "converted_text": ocr_results,
359
+ "truncated": truncated,
360
+ "length": len(ocr_results),
361
+ }
362
+
363
+ return results_dict
364
+
365
+
366
+ # @title translation functions
367
+
368
+ lt = LibreTranslateAPI("https://translate.astian.org/")
369
+
370
+
371
+ def translate_text(text, source_l, target_l="en"):
372
+
373
+ return str(lt.translate(text, source_l, target_l))
374
+
375
+
376
+ def translate_doc(filepath, lang_start, lang_end="en", verbose=False):
377
+ """translate a document from lang_start to lang_end
378
+
379
+ {'code': 'en', 'name': 'English'},
380
+ {'code': 'fr', 'name': 'French'},
381
+ {'code': 'de', 'name': 'German'},
382
+ {'code': 'it', 'name': 'Italian'},"""
383
+
384
+ src_folder = dirname(filepath)
385
+ src_folder = Path(src_folder)
386
+ trgt_folder = src_folder / f"translated_{lang_end}"
387
+ trgt_folder.mkdir(exist_ok=True)
388
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
389
+ foreign_t = f.readlines()
390
+ in_name = basename(filepath)
391
+ translated_doc = []
392
+ for line in tqdm(
393
+ foreign_t, total=len(foreign_t), desc="translating {}...".format(in_name[:10])
394
+ ):
395
+ translated_line = translate_text(line, lang_start, lang_end)
396
+ translated_doc.append(translated_line)
397
+ t_out_name = "[To {}]".format(lang_end) + simple_rename(in_name) + ".txt"
398
+ out_path = join(trgt_folder, t_out_name)
399
+ with open(out_path, "w", encoding="utf-8", errors="ignore") as f_o:
400
+ f_o.writelines(translated_doc)
401
+ if verbose:
402
+ print("finished translating the document! - ", datetime.now())
403
+ return out_path
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ clean-text[gpl]
2
+ python-doctr[torch]
3
+ gradio
4
+ libretranslatepy
5
+ natsort
6
+ nltk
7
+ pyspellchecker
8
+ torch
9
+ tqdm
10
+ transformers
11
+ accelerate
summarize.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ import torch
4
+ from tqdm.auto import tqdm
5
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
6
+
7
+
8
+ def load_model_and_tokenizer(model_name):
9
+ """
10
+ load_model_and_tokenizer - a function that loads a model and tokenizer from huggingface
11
+
12
+ Args:
13
+ model_name (str): the name of the model to load
14
+ Returns:
15
+ AutoModelForSeq2SeqLM: the model
16
+ AutoTokenizer: the tokenizer
17
+ """
18
+
19
+ model = AutoModelForSeq2SeqLM.from_pretrained(
20
+ model_name,
21
+ # low_cpu_mem_usage=True,
22
+ # use_cache=False,
23
+ )
24
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
25
+ model = model.to("cuda") if torch.cuda.is_available() else model
26
+
27
+ logging.info(f"Loaded model {model_name}")
28
+ return model, tokenizer
29
+
30
+
31
+ def summarize_and_score(ids, mask, model, tokenizer, **kwargs):
32
+ """
33
+ summarize_and_score - given a batch of ids and a mask, return a summary and a score for the summary
34
+
35
+ Args:
36
+ ids (): the batch of ids
37
+ mask (): the attention mask for the batch
38
+ model (): the model to use for summarization
39
+ tokenizer (): the tokenizer to use for summarization
40
+
41
+ Returns:
42
+ str: the summary of the batch
43
+ """
44
+
45
+ ids = ids[None, :]
46
+ mask = mask[None, :]
47
+
48
+ input_ids = ids.to("cuda") if torch.cuda.is_available() else ids
49
+ attention_mask = mask.to("cuda") if torch.cuda.is_available() else mask
50
+
51
+ global_attention_mask = torch.zeros_like(attention_mask)
52
+ # put global attention on <s> token
53
+ global_attention_mask[:, 0] = 1
54
+
55
+ summary_pred_ids = model.generate(
56
+ input_ids,
57
+ attention_mask=attention_mask,
58
+ global_attention_mask=global_attention_mask,
59
+ output_scores=True,
60
+ return_dict_in_generate=True,
61
+ **kwargs,
62
+ )
63
+ summary = tokenizer.batch_decode(
64
+ summary_pred_ids.sequences,
65
+ skip_special_tokens=True,
66
+ remove_invalid_values=True,
67
+ )
68
+ score = round(summary_pred_ids.sequences_scores.cpu().numpy()[0], 4)
69
+
70
+ return summary, score
71
+
72
+
73
+ def summarize_via_tokenbatches(
74
+ input_text: str,
75
+ model,
76
+ tokenizer,
77
+ batch_length=2048,
78
+ batch_stride=16,
79
+ **kwargs,
80
+ ):
81
+ """
82
+ summarize_via_tokenbatches - a function that takes a string and returns a summary
83
+
84
+ Args:
85
+ input_text (str): the text to summarize
86
+ model (): the model to use for summarizationz
87
+ tokenizer (): the tokenizer to use for summarization
88
+ batch_length (int, optional): the length of each batch. Defaults to 2048.
89
+ batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
90
+
91
+ Returns:
92
+ str: the summary
93
+ """
94
+ # log all input parameters
95
+ if batch_length < 512:
96
+ batch_length = 512
97
+ print("WARNING: batch_length was set to 512")
98
+ print(
99
+ f"input parameters: {kwargs}, batch_length={batch_length}, batch_stride={batch_stride}"
100
+ )
101
+ encoded_input = tokenizer(
102
+ input_text,
103
+ padding="max_length",
104
+ truncation=True,
105
+ max_length=batch_length,
106
+ stride=batch_stride,
107
+ return_overflowing_tokens=True,
108
+ add_special_tokens=False,
109
+ return_tensors="pt",
110
+ )
111
+
112
+ in_id_arr, att_arr = encoded_input.input_ids, encoded_input.attention_mask
113
+ gen_summaries = []
114
+
115
+ pbar = tqdm(total=len(in_id_arr))
116
+
117
+ for _id, _mask in zip(in_id_arr, att_arr):
118
+
119
+ result, score = summarize_and_score(
120
+ ids=_id,
121
+ mask=_mask,
122
+ model=model,
123
+ tokenizer=tokenizer,
124
+ **kwargs,
125
+ )
126
+ score = round(float(score), 4)
127
+ _sum = {
128
+ "input_tokens": _id,
129
+ "summary": result,
130
+ "summary_score": score,
131
+ }
132
+ gen_summaries.append(_sum)
133
+ print(f"\t{result[0]}\nScore:\t{score}")
134
+ pbar.update()
135
+
136
+ pbar.close()
137
+
138
+ return gen_summaries
utils.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils.py - Utility functions for the project.
3
+ """
4
+
5
+ import re
6
+ from pathlib import Path
7
+
8
+ from natsort import natsorted
9
+
10
+
11
+ def truncate_word_count(text, max_words=512):
12
+ """
13
+ truncate_word_count - a helper function for the gradio module
14
+ Parameters
15
+ ----------
16
+ text : str, required, the text to be processed
17
+ max_words : int, optional, the maximum number of words, default=512
18
+ Returns
19
+ -------
20
+ dict, the text and whether it was truncated
21
+ """
22
+ # split on whitespace with regex
23
+ words = re.split(r"\s+", text)
24
+ processed = {}
25
+ if len(words) > max_words:
26
+ processed["was_truncated"] = True
27
+ processed["truncated_text"] = " ".join(words[:max_words])
28
+ else:
29
+ processed["was_truncated"] = False
30
+ processed["truncated_text"] = text
31
+ return processed
32
+
33
+
34
+ def load_examples(src):
35
+ """
36
+ load_examples - a helper function for the gradio module to load examples
37
+ Returns:
38
+ list of str, the examples
39
+ """
40
+ src = Path(src)
41
+ src.mkdir(exist_ok=True)
42
+ examples = [f for f in src.glob("*.txt")]
43
+ examples = natsorted(examples)
44
+ # load the examples into a list
45
+ text_examples = []
46
+ for example in examples:
47
+ with open(example, "r") as f:
48
+ text = f.read()
49
+ text_examples.append([text, "large", 2, 512, 0.7, 3.5, 3])
50
+
51
+ return text_examples
52
+
53
+
54
+ def load_example_filenames(example_path: str or Path):
55
+ """
56
+ load_example_filenames - a helper function for the gradio module to load examples
57
+ Returns:
58
+ dict, the examples (filename:full path)
59
+ """
60
+ example_path = Path(example_path)
61
+ # load the examples into a list
62
+ examples = {f.name: f for f in example_path.glob("*.txt")}
63
+ return examples