KAI MAURIN-JONES commited on
Commit
fa6876f
1 Parent(s): e410fea

file cleaning

Browse files
Files changed (2) hide show
  1. data/wordle_humans.csv +0 -40
  2. wordle_testing.ipynb +0 -1654
data/wordle_humans.csv DELETED
@@ -1,40 +0,0 @@
1
- player,target,first_guess,second_guess,third_guess,fourth_guess,fifth_guess,sixth_guess
2
- human,vague,arose,plate,cache,mauve,vague,none
3
- human,apple,douce,lairy,slave,algae,apple,none
4
- human,ninth,douce,lairy,gimps,ninth,none,none
5
- human,flail,douce,lairy,snail,flail,none,none
6
- human,stage,douce,lairy,phase,stage,none,none
7
- human,heady,douce,lairy,ready,beady,heady,none
8
- human,none,douce,dairy,dunes,debug,none,none
9
- human,usage,douce,lairy,aunts,swamp,usage,none
10
- human,sound,douce,pound,found,bound,mound,sound
11
- human,salsa,douce,lairy,palms,salts,ghost,salsa
12
- human,magic,douce,lairy,cabin,manic,magic,none
13
- human,none,douce,lairy,cache,none,none,none
14
- human,avail,douce,lairy,snail,flail,avail,none
15
- human,kiosk,douce,lairy,lions,moist,frosh,kiosk
16
- human,sweat,douce,lairy,wheat,sweat,none,none
17
- human,ruddy,douce,lairy,ruddy,none,none,none
18
- human,riper,douce,lairy,tries,miner,viper,wiper
19
- human,vague,douce,lairy,mauve,vague,none,none
20
- human,syrup,arose,unity,rubby,syrup,none,none
21
- human,above,adore,above,none,none,none,none
22
- human,moose,arose,those,close,unity,moose,none
23
- human,polka,arose,unity,polar,polka,none,none
24
- human,syrup,audio,plume,usurp,syrup,none,none
25
- human,worse,audio,store,horse,worse,none,none
26
- human,worse,audio,stone,posse,worse,none,none
27
- human,polka,audio,polar,polka,none,none,none
28
- human,polka,audio,groan,poach,polka,none,none
29
- human,moose,audio,store,chose,loose,goose,moose
30
- human,moose,audio,store,close,moose,none,none
31
- human,squat,homes,start,squat,none,none,none
32
- human,above,audio,alone,above,none,none,none
33
- human,squat,audio,usual,squat,none,none,none
34
- human,squat,audio,taupe,squat,none,none,none
35
- human,Trend,Audio,Nerdy,Trend,none,none,none
36
- human,Trend,Audio,Depth,Trend,none,none,none
37
- human,Trend,Later,Rents,Trend,none,none,none
38
- human,toxic,Later,throw,toxic,none,none,none
39
- human,toxic,AUDIO,CHOIR,toxic,none,none,none
40
- human,toxic,audio,movie,colic,robin,toxic,none
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
wordle_testing.ipynb DELETED
@@ -1,1654 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "attachments": {},
5
- "cell_type": "markdown",
6
- "metadata": {},
7
- "source": [
8
- "## Imports"
9
- ]
10
- },
11
- {
12
- "cell_type": "code",
13
- "execution_count": 1,
14
- "metadata": {},
15
- "outputs": [],
16
- "source": [
17
- "import numpy as np\n",
18
- "import random\n",
19
- "import operator\n",
20
- "import time\n",
21
- "import pandas as pd\n",
22
- "from wordle_functions import *"
23
- ]
24
- },
25
- {
26
- "cell_type": "markdown",
27
- "metadata": {},
28
- "source": [
29
- "## Importing datasets"
30
- ]
31
- },
32
- {
33
- "attachments": {},
34
- "cell_type": "markdown",
35
- "metadata": {},
36
- "source": [
37
- "### official words\n",
38
- "- official wordle word list"
39
- ]
40
- },
41
- {
42
- "cell_type": "code",
43
- "execution_count": 2,
44
- "metadata": {},
45
- "outputs": [
46
- {
47
- "name": "stdout",
48
- "output_type": "stream",
49
- "text": [
50
- "2310\n"
51
- ]
52
- },
53
- {
54
- "data": {
55
- "text/plain": [
56
- "['wince', 'thyme', 'mower', 'horde', 'heard']"
57
- ]
58
- },
59
- "execution_count": 2,
60
- "metadata": {},
61
- "output_type": "execute_result"
62
- }
63
- ],
64
- "source": [
65
- "### Official list\n",
66
- "official_words = []\n",
67
- "\n",
68
- "with open(\"data/official_words_processed.txt\", \"r\", encoding = \"utf-8\") as f:\n",
69
- " for word in f.read().split(\"\\n\"):\n",
70
- " official_words.append(word)\n",
71
- "\n",
72
- "f.close() # closes connection to file\n",
73
- "\n",
74
- "print(len(official_words))\n",
75
- "official_words[:5]"
76
- ]
77
- },
78
- {
79
- "attachments": {},
80
- "cell_type": "markdown",
81
- "metadata": {},
82
- "source": [
83
- "### alternative list 1\n",
84
- "- an alternate list of 5-letter words found on the web"
85
- ]
86
- },
87
- {
88
- "cell_type": "code",
89
- "execution_count": 3,
90
- "metadata": {},
91
- "outputs": [
92
- {
93
- "ename": "FileNotFoundError",
94
- "evalue": "[Errno 2] No such file or directory: 'data/alt_words_1.txt'",
95
- "output_type": "error",
96
- "traceback": [
97
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
98
- "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
99
- "\u001b[1;32m/Users/kmaurinjones/Desktop/data_science/data_science_projects/wordle_wizard/wordle_testing.ipynb Cell 7\u001b[0m in \u001b[0;36m<cell line: 4>\u001b[0;34m()\u001b[0m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/kmaurinjones/Desktop/data_science/data_science_projects/wordle_wizard/wordle_testing.ipynb#W6sZmlsZQ%3D%3D?line=0'>1</a>\u001b[0m \u001b[39m### Official list\u001b[39;00m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/kmaurinjones/Desktop/data_science/data_science_projects/wordle_wizard/wordle_testing.ipynb#W6sZmlsZQ%3D%3D?line=1'>2</a>\u001b[0m alt_words_1 \u001b[39m=\u001b[39m []\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/kmaurinjones/Desktop/data_science/data_science_projects/wordle_wizard/wordle_testing.ipynb#W6sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(\u001b[39m\"\u001b[39;49m\u001b[39mdata/alt_words_1.txt\u001b[39;49m\u001b[39m\"\u001b[39;49m, \u001b[39m\"\u001b[39;49m\u001b[39mr\u001b[39;49m\u001b[39m\"\u001b[39;49m, encoding \u001b[39m=\u001b[39;49m \u001b[39m\"\u001b[39;49m\u001b[39mutf-8\u001b[39;49m\u001b[39m\"\u001b[39;49m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/kmaurinjones/Desktop/data_science/data_science_projects/wordle_wizard/wordle_testing.ipynb#W6sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m \u001b[39mfor\u001b[39;00m word \u001b[39min\u001b[39;00m f\u001b[39m.\u001b[39mread()\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m):\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/kmaurinjones/Desktop/data_science/data_science_projects/wordle_wizard/wordle_testing.ipynb#W6sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m alt_words_1\u001b[39m.\u001b[39mappend(word)\n",
100
- "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'data/alt_words_1.txt'"
101
- ]
102
- }
103
- ],
104
- "source": [
105
- "### Official list\n",
106
- "alt_words_1 = []\n",
107
- "\n",
108
- "with open(\"data/alt_words_1.txt\", \"r\", encoding = \"utf-8\") as f:\n",
109
- " for word in f.read().split(\"\\n\"):\n",
110
- " alt_words_1.append(word)\n",
111
- "\n",
112
- "f.close() # closes connection to file\n",
113
- "\n",
114
- "print(len(alt_words_1))\n",
115
- "alt_words_1[:5]"
116
- ]
117
- },
118
- {
119
- "attachments": {},
120
- "cell_type": "markdown",
121
- "metadata": {},
122
- "source": [
123
- "### nltk grand corpus\n",
124
- "- Amalgamation of all words in various NLTK corpora to have as big a dataset as possible\n",
125
- "- Developed manually"
126
- ]
127
- },
128
- {
129
- "cell_type": "code",
130
- "execution_count": null,
131
- "metadata": {},
132
- "outputs": [],
133
- "source": [
134
- "### grand corpus tokens\n",
135
- "nltk_tokens = []\n",
136
- "\n",
137
- "with open(\"data/nltk_grand_corpus_tokens_5.txt\", \"r\", encoding = \"utf-8\") as f:\n",
138
- " for word in f.read().split(\"\\n\"):\n",
139
- " nltk_tokens.append(word)\n",
140
- "\n",
141
- "f.close() # closes connection to file\n",
142
- "\n",
143
- "print(len(nltk_tokens))\n",
144
- "nltk_tokens[:5]"
145
- ]
146
- },
147
- {
148
- "cell_type": "markdown",
149
- "metadata": {},
150
- "source": [
151
- "### nltk grand corpus types and counts"
152
- ]
153
- },
154
- {
155
- "cell_type": "code",
156
- "execution_count": null,
157
- "metadata": {},
158
- "outputs": [],
159
- "source": [
160
- "### grand corpus types and counts\n",
161
- "nltk_counts = {}\n",
162
- "\n",
163
- "with open(\"data/nltk_grand_corpus_types_and_counts_5.txt\", \"r\", encoding = \"utf-8\") as f:\n",
164
- " for line in f.read().split(\"\\n\"):\n",
165
- " if len(line.split(\"\\t\")) == 2:\n",
166
- " word = line.split(\"\\t\")[0]\n",
167
- " count = line.split(\"\\t\")[1]\n",
168
- " nltk_counts[word] = count\n",
169
- " else:\n",
170
- " continue\n",
171
- "\n",
172
- "f.close() # closes connection to file\n",
173
- "\n",
174
- "print(len(nltk_counts))\n",
175
- "nltk_counts['which']"
176
- ]
177
- },
178
- {
179
- "cell_type": "code",
180
- "execution_count": null,
181
- "metadata": {},
182
- "outputs": [],
183
- "source": [
184
- "### Official list\n",
185
- "official_words = []\n",
186
- "\n",
187
- "with open(\"data/official_words_processed.txt\", \"r\", encoding = \"utf-8\") as f:\n",
188
- " for word in f.read().split(\"\\n\"):\n",
189
- " if len(word) > 0: # there's one blank entry at the start\n",
190
- " official_words.append(word)\n",
191
- "\n",
192
- "f.close() # closes connection to file\n",
193
- "\n",
194
- "print(len(official_words))\n",
195
- "official_words[:10]"
196
- ]
197
- },
198
- {
199
- "attachments": {},
200
- "cell_type": "markdown",
201
- "metadata": {},
202
- "source": [
203
- "## Wordle functions + Testing"
204
- ]
205
- },
206
- {
207
- "attachments": {},
208
- "cell_type": "markdown",
209
- "metadata": {},
210
- "source": [
211
- "### Testing `wordle_wizard()`"
212
- ]
213
- },
214
- {
215
- "cell_type": "code",
216
- "execution_count": 13,
217
- "metadata": {},
218
- "outputs": [
219
- {
220
- "name": "stdout",
221
- "output_type": "stream",
222
- "text": [
223
- "-----------------------------\n",
224
- "\n",
225
- "Guess 1: 'poesy'\n",
226
- "Letters in correct positions:\n",
227
- "\t[]\n",
228
- "\n",
229
- "Letters in incorrect positions:\n",
230
- "\t[('e', 2)]\n",
231
- "\n",
232
- "Letters to guess again:\n",
233
- "\t['e']\n",
234
- "\n",
235
- "Letters to not guess again:\n",
236
- "\t['o', 'p', 's', 'y']\n",
237
- "\n",
238
- "At this point:\n",
239
- "\t1905, 82.47% of total words have been eliminated, and\n",
240
- "\t405, 17.53% of total words remain possible.\n",
241
- "\n",
242
- "The top 40 potential next guesses are:\n",
243
- "\t[('alter', 100.0), ('later', 100.0), ('irate', 98.33), ('renal', 94.34), ('learn', 94.34), ('react', 91.3), ('crate', 91.3), ('trace', 91.3), ('cater', 91.3), ('trade', 88.34), ('leant', 88.3), ('heart', 88.13), ('earth', 88.13), ('hater', 88.13), ('aider', 86.22), ('alien', 86.18), ('crane', 85.68), ('tamer', 85.55), ('grate', 85.35), ('realm', 85.1), ('regal', 84.89), ('glare', 84.89), ('lager', 84.89), ('large', 84.89), ('eclat', 84.76), ('blare', 83.81), ('baler', 83.81), ('inter', 83.81), ('liner', 83.35), ('after', 82.43), ('flare', 81.93), ('feral', 81.93), ('delta', 81.81), ('dealt', 81.81), ('taker', 81.72), ('lathe', 81.6), ('water', 81.14), ('trice', 80.31), ('afire', 80.31), ('ramen', 79.93)]\n",
244
- "\n",
245
- "Words guessed so far:\n",
246
- "\t['poesy'].\n",
247
- "\n",
248
- "Next guess:\n",
249
- "\t'alter'\n",
250
- "\n",
251
- "-----------------------------\n",
252
- "\n",
253
- "Guess 2: 'alter'\n",
254
- "Letters in correct positions:\n",
255
- "\t[]\n",
256
- "\n",
257
- "Letters in incorrect positions:\n",
258
- "\t[('a', 0), ('e', 2), ('t', 2), ('e', 3), ('r', 4)]\n",
259
- "\n",
260
- "Letters to guess again:\n",
261
- "\t['a', 'e', 'r', 't']\n",
262
- "\n",
263
- "Letters to not guess again:\n",
264
- "\t['l', 'o', 'p', 's', 'y']\n",
265
- "\n",
266
- "At this point:\n",
267
- "\t2301, 99.61% of total words have been eliminated, and\n",
268
- "\t9, 0.39% of total words remain possible.\n",
269
- "\n",
270
- "All potential next guesses:\n",
271
- "\t[('irate', 100.0), ('crate', 70.91), ('trace', 70.91), ('react', 70.91), ('trade', 58.69), ('heart', 57.83), ('earth', 57.83), ('grate', 46.3), ('terra', 0.0)]\n",
272
- "\n",
273
- "Words guessed so far:\n",
274
- "\t['poesy', 'alter'].\n",
275
- "\n",
276
- "Next guess:\n",
277
- "\t'irate'\n",
278
- "\n",
279
- "-----------------------------\n",
280
- "\n",
281
- "Guess 3: 'irate'\n",
282
- "Letters in correct positions:\n",
283
- "\t[('r', 1), ('a', 2), ('t', 3), ('e', 4)]\n",
284
- "\n",
285
- "Letters in incorrect positions:\n",
286
- "\t[('a', 0), ('e', 2), ('t', 2), ('e', 3), ('r', 4)]\n",
287
- "\n",
288
- "Letters to guess again:\n",
289
- "\t['a', 'e', 'r', 't']\n",
290
- "\n",
291
- "Letters to not guess again:\n",
292
- "\t['i', 'l', 'o', 'p', 's', 'y']\n",
293
- "\n",
294
- "At this point:\n",
295
- "\t2308, 99.91% of total words have been eliminated, and\n",
296
- "\t2, 0.09% of total words remain possible.\n",
297
- "\n",
298
- "All potential next guesses:\n",
299
- "\t[('crate', 100.0), ('grate', 0.0)]\n",
300
- "\n",
301
- "Words guessed so far:\n",
302
- "\t['poesy', 'alter', 'irate'].\n",
303
- "\n",
304
- "Next guess:\n",
305
- "\t'crate'\n",
306
- "\n",
307
- "-----------------------------\n",
308
- "\n",
309
- "Guess 4: 'crate'\n",
310
- "Letters in correct positions:\n",
311
- "\t[('r', 1), ('a', 2), ('t', 3), ('e', 4)]\n",
312
- "\n",
313
- "Letters in incorrect positions:\n",
314
- "\t[('a', 0), ('e', 2), ('t', 2), ('e', 3), ('r', 4)]\n",
315
- "\n",
316
- "Letters to guess again:\n",
317
- "\t['a', 'e', 'r', 't']\n",
318
- "\n",
319
- "Letters to not guess again:\n",
320
- "\t['c', 'i', 'l', 'o', 'p', 's', 'y']\n",
321
- "\n",
322
- "At this point:\n",
323
- "\t2309, 99.96% of total words have been eliminated, and\n",
324
- "\t1, 0.04% of total words remain possible.\n",
325
- "\n",
326
- "The only remaining possible word is:\n",
327
- "\t'grate'\n",
328
- "\n",
329
- "Next guess:\n",
330
- "\t'grate'\n",
331
- "\n",
332
- "-----------------------------\n",
333
- "\n",
334
- "Guess 5: 'grate'\n",
335
- "\n",
336
- "Congratulations! The Wordle has been solved in 5 guesses!\n",
337
- "There were still 1 guesses remaining.\n",
338
- "\n",
339
- "The target word was 'grate'.\n",
340
- "\n",
341
- "-----------------------------\n"
342
- ]
343
- }
344
- ],
345
- "source": [
346
- "test_1 = wordle_wizard(word_list = official_words, max_guesses = 6, \n",
347
- " guess = \"paint\", target = \"force\",\n",
348
- " random_guess = True, random_target = True, \n",
349
- " verbose = True, drama = 0, return_stats = False, record = False)"
350
- ]
351
- },
352
- {
353
- "cell_type": "code",
354
- "execution_count": null,
355
- "metadata": {},
356
- "outputs": [],
357
- "source": [
358
- "for val in [False, True]:\n",
359
- " wordle_wizard(word_list = official_words, max_guesses = 6, \n",
360
- " guess = \"arose\", target = \"syrup\", bias = 'entropy', \n",
361
- " random_guess = False, random_target = False, \n",
362
- " verbose = val, drama = 0, return_stats = False, record = True)"
363
- ]
364
- },
365
- {
366
- "attachments": {},
367
- "cell_type": "markdown",
368
- "metadata": {},
369
- "source": [
370
- "### Testing on 3-letter words"
371
- ]
372
- },
373
- {
374
- "cell_type": "code",
375
- "execution_count": null,
376
- "metadata": {},
377
- "outputs": [],
378
- "source": [
379
- "### 3 letters\n",
380
- "words_3_letters = []\n",
381
- "words_3_types_counts = {}\n",
382
- "\n",
383
- "with open(\"data/nltk_grand_corpus_types_and_counts_3.txt\", \"r\", encoding = \"utf-8\") as f:\n",
384
- " for line in f.read().split(\"\\n\"):\n",
385
- " word_freq = line.split(\"\\t\")\n",
386
- " if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line\n",
387
- " word = word_freq[0]\n",
388
- " freq = word_freq[1]\n",
389
- " if word.isascii() == True:\n",
390
- " words_3_letters.append(word)\n",
391
- " words_3_types_counts[word] = freq\n",
392
- "\n",
393
- "f.close() # closes connection to file\n",
394
- "\n",
395
- "print(len(words_3_letters))\n",
396
- "print(words_3_letters[:5])\n",
397
- "words_3_types_counts['the']"
398
- ]
399
- },
400
- {
401
- "cell_type": "code",
402
- "execution_count": null,
403
- "metadata": {},
404
- "outputs": [],
405
- "source": [
406
- "for val in [False, True]:\n",
407
- " wordle_wizard(word_list = words_3_letters, max_guesses = 6, \n",
408
- " guess = \"the\", target = \"his\", bias = 'entropy', \n",
409
- " random_guess = False, random_target = False, \n",
410
- " verbose = val, drama = 0, return_stats = False, record = True)"
411
- ]
412
- },
413
- {
414
- "attachments": {},
415
- "cell_type": "markdown",
416
- "metadata": {},
417
- "source": [
418
- "### Testing on 4-letter words"
419
- ]
420
- },
421
- {
422
- "cell_type": "code",
423
- "execution_count": null,
424
- "metadata": {},
425
- "outputs": [],
426
- "source": [
427
- "### 3 letters\n",
428
- "words_4_letters = []\n",
429
- "words_4_types_counts = {}\n",
430
- "\n",
431
- "with open(\"data/nltk_grand_corpus_types_and_counts_4.txt\", \"r\", encoding = \"utf-8\") as f:\n",
432
- " for line in f.read().split(\"\\n\"):\n",
433
- " word_freq = line.split(\"\\t\")\n",
434
- " if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line\n",
435
- " word = word_freq[0]\n",
436
- " freq = word_freq[1]\n",
437
- " if word.isascii() == True:\n",
438
- " words_4_letters.append(word)\n",
439
- " words_4_types_counts[word] = freq\n",
440
- "\n",
441
- "f.close() # closes connection to file\n",
442
- "\n",
443
- "print(len(words_4_letters))\n",
444
- "print(words_4_letters[:5])\n",
445
- "words_4_types_counts['that']"
446
- ]
447
- },
448
- {
449
- "cell_type": "code",
450
- "execution_count": null,
451
- "metadata": {},
452
- "outputs": [],
453
- "source": [
454
- "for val in [False, True]:\n",
455
- " wordle_wizard(word_list = words_4_letters, max_guesses = 6, \n",
456
- " guess = \"have\", target = \"this\", bias = 'entropy', \n",
457
- " random_guess = False, random_target = False, \n",
458
- " verbose = val, drama = 0, return_stats = False, record = True)"
459
- ]
460
- },
461
- {
462
- "attachments": {},
463
- "cell_type": "markdown",
464
- "metadata": {},
465
- "source": [
466
- "### Testing on 6-letter words"
467
- ]
468
- },
469
- {
470
- "cell_type": "code",
471
- "execution_count": null,
472
- "metadata": {},
473
- "outputs": [],
474
- "source": [
475
- "### 6 letters\n",
476
- "words_6_letters = []\n",
477
- "words_6_types_counts = {}\n",
478
- "\n",
479
- "with open(\"data/nltk_grand_corpus_types_and_counts_6.txt\", \"r\", encoding = \"utf-8\") as f:\n",
480
- " for line in f.read().split(\"\\n\"):\n",
481
- " word_freq = line.split(\"\\t\")\n",
482
- " if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line\n",
483
- " word = word_freq[0]\n",
484
- " freq = word_freq[1]\n",
485
- " if word.isascii() == True:\n",
486
- " words_6_letters.append(word)\n",
487
- " words_6_types_counts[word] = freq\n",
488
- "\n",
489
- "f.close() # closes connection to file\n",
490
- "\n",
491
- "print(len(words_6_letters))\n",
492
- "print(words_6_letters[:5])\n",
493
- "words_6_types_counts[words_6_letters[0]]"
494
- ]
495
- },
496
- {
497
- "cell_type": "code",
498
- "execution_count": null,
499
- "metadata": {},
500
- "outputs": [],
501
- "source": [
502
- "for val in [False, True]:\n",
503
- " wordle_wizard(word_list = words_6_letters, max_guesses = 6, \n",
504
- " guess = \"little\", target = \"before\", bias = 'entropy', \n",
505
- " random_guess = False, random_target = False, \n",
506
- " verbose = val, drama = 0, return_stats = False, record = True)"
507
- ]
508
- },
509
- {
510
- "attachments": {},
511
- "cell_type": "markdown",
512
- "metadata": {},
513
- "source": [
514
- "### Testing on 7-letter words"
515
- ]
516
- },
517
- {
518
- "cell_type": "code",
519
- "execution_count": null,
520
- "metadata": {},
521
- "outputs": [],
522
- "source": [
523
- "### 7 letters\n",
524
- "words_7_letters = []\n",
525
- "words_7_types_counts = {}\n",
526
- "\n",
527
- "with open(\"data/nltk_grand_corpus_types_and_counts_7.txt\", \"r\", encoding = \"utf-8\") as f:\n",
528
- " for line in f.read().split(\"\\n\"):\n",
529
- " word_freq = line.split(\"\\t\")\n",
530
- " if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line\n",
531
- " word = word_freq[0]\n",
532
- " freq = word_freq[1]\n",
533
- " if word.isascii() == True:\n",
534
- " words_7_letters.append(word)\n",
535
- " words_7_types_counts[word] = freq\n",
536
- "\n",
537
- "f.close() # closes connection to file\n",
538
- "\n",
539
- "print(len(words_7_letters))\n",
540
- "print(words_7_letters[:5])\n",
541
- "words_7_types_counts[words_7_letters[0]]"
542
- ]
543
- },
544
- {
545
- "cell_type": "code",
546
- "execution_count": null,
547
- "metadata": {},
548
- "outputs": [],
549
- "source": [
550
- "for val in [False, True]:\n",
551
- " wordle_wizard(word_list = words_7_letters, max_guesses = 6, \n",
552
- " guess = \"because\", target = \"through\", bias = 'entropy', \n",
553
- " random_guess = True, random_target = True, \n",
554
- " verbose = val, drama = 0, return_stats = False, record = True)"
555
- ]
556
- },
557
- {
558
- "attachments": {},
559
- "cell_type": "markdown",
560
- "metadata": {},
561
- "source": [
562
- "### Testing on 8-letter words"
563
- ]
564
- },
565
- {
566
- "cell_type": "code",
567
- "execution_count": null,
568
- "metadata": {},
569
- "outputs": [],
570
- "source": [
571
- "### 8 letters\n",
572
- "words_8_letters = []\n",
573
- "words_8_types_counts = {}\n",
574
- "\n",
575
- "with open(\"data/nltk_grand_corpus_types_and_counts_8.txt\", \"r\", encoding = \"utf-8\") as f:\n",
576
- " for line in f.read().split(\"\\n\"):\n",
577
- " word_freq = line.split(\"\\t\")\n",
578
- " if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line\n",
579
- " word = word_freq[0]\n",
580
- " freq = word_freq[1]\n",
581
- " if word.isascii() == True:\n",
582
- " words_8_letters.append(word)\n",
583
- " words_8_types_counts[word] = freq\n",
584
- "\n",
585
- "f.close() # closes connection to file\n",
586
- "\n",
587
- "print(len(words_8_letters))\n",
588
- "print(words_8_letters[:5])\n",
589
- "words_8_types_counts[words_8_letters[0]]"
590
- ]
591
- },
592
- {
593
- "cell_type": "code",
594
- "execution_count": null,
595
- "metadata": {},
596
- "outputs": [],
597
- "source": [
598
- "for val in [False, True]:\n",
599
- " wordle_wizard(word_list = words_8_letters, max_guesses = 6, \n",
600
- " guess = \"trinidad\", target = \"together\", bias = 'entropy', \n",
601
- " random_guess = False, random_target = False, \n",
602
- " verbose = val, drama = 0, return_stats = False, record = True)"
603
- ]
604
- },
605
- {
606
- "attachments": {},
607
- "cell_type": "markdown",
608
- "metadata": {},
609
- "source": [
610
- "### Testing on 9-letter words"
611
- ]
612
- },
613
- {
614
- "cell_type": "code",
615
- "execution_count": null,
616
- "metadata": {},
617
- "outputs": [],
618
- "source": [
619
- "### 9 letters\n",
620
- "words_9_letters = []\n",
621
- "words_9_types_counts = {}\n",
622
- "\n",
623
- "with open(\"data/nltk_grand_corpus_types_and_counts_9.txt\", \"r\", encoding = \"utf-8\") as f:\n",
624
- " for line in f.read().split(\"\\n\"):\n",
625
- " word_freq = line.split(\"\\t\")\n",
626
- " if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line\n",
627
- " word = word_freq[0]\n",
628
- " freq = word_freq[1]\n",
629
- " if word.isascii() == True:\n",
630
- " words_9_letters.append(word)\n",
631
- " words_9_types_counts[word] = freq\n",
632
- "\n",
633
- "f.close() # closes connection to file\n",
634
- "\n",
635
- "print(len(words_9_letters))\n",
636
- "print(words_9_letters[:5])\n",
637
- "words_9_types_counts[words_9_letters[0]]"
638
- ]
639
- },
640
- {
641
- "cell_type": "code",
642
- "execution_count": null,
643
- "metadata": {},
644
- "outputs": [],
645
- "source": [
646
- "for val in [False, True]:\n",
647
- " wordle_wizard(word_list = words_9_letters, max_guesses = 6, \n",
648
- " guess = \"something\", target = \"character\", bias = 'entropy', \n",
649
- " random_guess = True, random_target = False, \n",
650
- " verbose = val, drama = 0, return_stats = False, record = True)"
651
- ]
652
- },
653
- {
654
- "attachments": {},
655
- "cell_type": "markdown",
656
- "metadata": {},
657
- "source": [
658
- "### Testing on 10-letter words"
659
- ]
660
- },
661
- {
662
- "cell_type": "code",
663
- "execution_count": null,
664
- "metadata": {},
665
- "outputs": [],
666
- "source": [
667
- "### 10 letters\n",
668
- "words_10_letters = []\n",
669
- "words_10_types_counts = {}\n",
670
- "\n",
671
- "with open(\"data/nltk_grand_corpus_types_and_counts_10.txt\", \"r\", encoding = \"utf-8\") as f:\n",
672
- " for line in f.read().split(\"\\n\"):\n",
673
- " word_freq = line.split(\"\\t\")\n",
674
- " if len(word_freq) == 2: # how many items are in each line, NOT the len of the word in the line\n",
675
- " word = word_freq[0]\n",
676
- " freq = word_freq[1]\n",
677
- " if word.isascii() == True:\n",
678
- " words_10_letters.append(word)\n",
679
- " words_10_types_counts[word] = freq\n",
680
- "\n",
681
- "f.close() # closes connection to file\n",
682
- "\n",
683
- "print(len(words_10_letters))\n",
684
- "print(words_10_letters[:5])\n",
685
- "words_10_types_counts[words_10_letters[0]]"
686
- ]
687
- },
688
- {
689
- "cell_type": "code",
690
- "execution_count": null,
691
- "metadata": {},
692
- "outputs": [],
693
- "source": [
694
- "for val in [False, True]:\n",
695
- " wordle_wizard(word_list = words_10_letters, max_guesses = 6, \n",
696
- " guess = \"characters\", target = \"theologies\", bias = 'entropy', \n",
697
- " random_guess = True, random_target = False, \n",
698
- " verbose = val, drama = 0, return_stats = False, record = True)"
699
- ]
700
- },
701
- {
702
- "cell_type": "code",
703
- "execution_count": null,
704
- "metadata": {},
705
- "outputs": [],
706
- "source": [
707
- "wordle_wizard(word_list = official_words, max_guesses = 5, \n",
708
- " guess = \"quote\", target = \"silly\", bias = 'entropy', \n",
709
- " random_guess = False, random_target = False, \n",
710
- " verbose = True, drama = 0, return_stats = False, record = False)"
711
- ]
712
- },
713
- {
714
- "attachments": {},
715
- "cell_type": "markdown",
716
- "metadata": {},
717
- "source": [
718
- "### `compare_wordle()` testing"
719
- ]
720
- },
721
- {
722
- "cell_type": "code",
723
- "execution_count": null,
724
- "metadata": {},
725
- "outputs": [],
726
- "source": [
727
- "df = pd.read_csv(\"compared_data/wordle_humans - Sheet1.csv\")\n",
728
- "print(df.shape)\n",
729
- "df"
730
- ]
731
- },
732
- {
733
- "cell_type": "code",
734
- "execution_count": null,
735
- "metadata": {},
736
- "outputs": [],
737
- "source": [
738
- "df = pd.read_csv(\"compared_data/wordle_humans - Sheet1.csv\")\n",
739
- "df\n",
740
- "convert_row(df, 37)"
741
- ]
742
- },
743
- {
744
- "cell_type": "code",
745
- "execution_count": null,
746
- "metadata": {},
747
- "outputs": [],
748
- "source": [
749
- "### TESTING DF INTERPRETATION\n",
750
- "\n",
751
- "df = pd.read_csv(\"compared_data/wordle_humans - Sheet1.csv\")\n",
752
- "\n",
753
- "row = 37\n",
754
- "\n",
755
- "print(convert_row(df, row))\n",
756
- "player = convert_row(df, row)[0]\n",
757
- "target_word = convert_row(df, row)[1]\n",
758
- "guess_list = convert_row(df, row)[2]\n",
759
- "\n",
760
- "compare_wordle(word_list = official_words, max_guesses = 6, \n",
761
- " guess_list = guess_list, player = player, target = target_word,\n",
762
- " verbose = False, return_stats = True, record = False)"
763
- ]
764
- },
765
- {
766
- "attachments": {},
767
- "cell_type": "markdown",
768
- "metadata": {},
769
- "source": [
770
- "## Comparing player solutions against wizard solutions"
771
- ]
772
- },
773
- {
774
- "cell_type": "code",
775
- "execution_count": null,
776
- "metadata": {},
777
- "outputs": [],
778
- "source": [
779
- "def create_compared_df(player_df, to_csv: bool = False, show_shapes: bool = False):\n",
780
- " \"\"\"\n",
781
- " Creates master df of player wordle scores compared to how wordle_wizard would perform on the same puzzles\n",
782
- "\n",
783
- " Parameters:\n",
784
- " -----\n",
785
- " `player_df`: Pandas dataFrame object\n",
786
- " df of player scores of wordle puzzles\n",
787
- " `to_csv`: bool\n",
788
- " If True, writes returned df to csv\n",
789
- " `show_shapes`: bool\n",
790
- " If True, prints shape of new df before and after deleting duplicate rows (created by wordle_wizard running the same puzzles multiple times)\n",
791
- " \n",
792
- " Returns:\n",
793
- " -----\n",
794
- " `df_master`: Pandas dataFrame object\n",
795
- " df of player scores and wordle_wizard scores of wordle puzzles\n",
796
- " \"\"\"\n",
797
- "\n",
798
- " stats_master = {}\n",
799
- " excepts = []\n",
800
- " for row in player_df.index:\n",
801
- " player = convert_row(player_df, row)[0]\n",
802
- " target_word = convert_row(player_df, row)[1]\n",
803
- " guess_list = convert_row(player_df, row)[2]\n",
804
- " try:\n",
805
- " complete = compare_wordle(word_list = official_words, max_guesses = 6, \n",
806
- " guess_list = guess_list, player = player, target = target_word,\n",
807
- " verbose = True, return_stats = True, record = False)\n",
808
- " for metric, results in complete.items():\n",
809
- " if metric in stats_master:\n",
810
- " for result in results:\n",
811
- " stats_master[metric].append(result)\n",
812
- " else:\n",
813
- " stats_master[metric] = []\n",
814
- " for result in results:\n",
815
- " stats_master[metric].append(result)\n",
816
- " except:\n",
817
- " AttributeError\n",
818
- " excepts.append(guess_list)\n",
819
- "\n",
820
- " df_master = pd.DataFrame(stats_master)\n",
821
- " print(df_master.columns.tolist())\n",
822
- "\n",
823
- " # Re-organizing columns to a more logical order (for viewing)\n",
824
- " df_master = df_master[['first_guess', 'target_word', 'player', 'num_guesses', 'expected_guesses', 'luck', 'first_guess_vowels', 'first_guess_consonants',\n",
825
- " 'target_vowels', 'target_consonants', 'first_guess_entropy', 'target_entropy',\n",
826
- " 'target_guessed', 'mid_guesses_avg_vows', 'mid_guesses_avg_cons', 'avg_perf_letters',\n",
827
- " 'avg_wrong_pos_letters', 'avg_wrong_letters', 'avg_remaining', 'avg_intermediate_guess_entropy',\n",
828
- " 'valid_success']]\n",
829
- "\n",
830
- " # print(excepts)\n",
831
- " if show_shapes == True:\n",
832
- " print(df_master.shape) # check shape before deleting dups\n",
833
- "\n",
834
- " # Delete duplicate rows (some created by process)\n",
835
- " df_master.drop_duplicates(inplace = True)\n",
836
- " \n",
837
- " if to_csv == True:\n",
838
- " df_master.to_csv('compared_data/players_compared.csv') # write new data to csv\n",
839
- " \n",
840
- " if show_shapes == True:\n",
841
- " print(df_master.shape) # check shape after deleting dups\n",
842
- " \n",
843
- " return df_master.reset_index().drop(columns = \"index\")"
844
- ]
845
- },
846
- {
847
- "cell_type": "code",
848
- "execution_count": null,
849
- "metadata": {},
850
- "outputs": [],
851
- "source": [
852
- "test_word = \"test 1 \"\n",
853
- "test_word.strip().lower()"
854
- ]
855
- },
856
- {
857
- "cell_type": "code",
858
- "execution_count": null,
859
- "metadata": {},
860
- "outputs": [],
861
- "source": [
862
- "df = pd.read_csv(\"compared_data/wordle_humans - Sheet1.csv\")\n",
863
- "\n",
864
- "df_master = create_compared_df(df, to_csv = True, show_shapes = True)\n",
865
- "df_master"
866
- ]
867
- },
868
- {
869
- "cell_type": "code",
870
- "execution_count": null,
871
- "metadata": {},
872
- "outputs": [],
873
- "source": [
874
- "print(df_master.query(\"player == 'aidan'\")['num_guesses'].mean())\n",
875
- "print(df_master.query(\"player == 'aidan'\").shape)\n",
876
- "df_master.query(\"player == 'aidan'\").head()"
877
- ]
878
- },
879
- {
880
- "cell_type": "code",
881
- "execution_count": null,
882
- "metadata": {},
883
- "outputs": [],
884
- "source": [
885
- "print(df_master.query(\"player == 'dad'\")['num_guesses'].mean())\n",
886
- "print(df_master.query(\"player == 'dad'\").shape)\n",
887
- "df_master.query(\"player == 'dad'\").head()"
888
- ]
889
- },
890
- {
891
- "cell_type": "code",
892
- "execution_count": null,
893
- "metadata": {},
894
- "outputs": [],
895
- "source": [
896
- "print(df_master.query(\"player == 'diane'\")['num_guesses'].mean())\n",
897
- "print(df_master.query(\"player == 'diane'\").shape)\n",
898
- "df_master.query(\"player == 'diane'\").head()"
899
- ]
900
- },
901
- {
902
- "cell_type": "code",
903
- "execution_count": null,
904
- "metadata": {},
905
- "outputs": [],
906
- "source": [
907
- "print(df_master.query(\"player == 'wizard'\")['num_guesses'].mean())\n",
908
- "print(df_master.query(\"player == 'wizard'\").shape)\n",
909
- "df_master.query(\"player == 'wizard'\").head(40)"
910
- ]
911
- },
912
- {
913
- "attachments": {},
914
- "cell_type": "markdown",
915
- "metadata": {},
916
- "source": [
917
- "## Prefix/Suffix bias"
918
- ]
919
- },
920
- {
921
- "cell_type": "code",
922
- "execution_count": null,
923
- "metadata": {},
924
- "outputs": [],
925
- "source": [
926
- "def get_gram_freq(word_list: list, letters_length: int = 2, position: bool = \"start\", search: any = None):\n",
927
- " \"\"\"\n",
928
- " Given a word list, a selected number of letter, a selected word position to start from (\"start\" or \"end\"),\n",
929
- " and an optional gram to search within the list, this function will get a frequency distribution of all n-grams\n",
930
- " from the passed word list and returned a frequency distribution in descending order.\n",
931
- "\n",
932
- " Parameters:\n",
933
- " ------\n",
934
- " `word_list`: list\n",
935
- " list of words of the same \n",
936
- " `letters_length`: int\n",
937
- " number of letters in succession. Size/length of \"gram\". Must be between 1 and length of words in word list\n",
938
- " `position`: bool\n",
939
- " Whether to start the gram from the start of the word (like a prefix) or the end of the word (like a suffix)\n",
940
- " `search`: str\n",
941
- " If != None, string of characters to search for within the generated list. If string not found in list, function will print an error message.\n",
942
- "\n",
943
- " Returns:\n",
944
- " ------\n",
945
- " `tup`: tuple\n",
946
- " If search != None, will return a tuple with the passed search criteria, and its count\n",
947
- " `sorted_gram_list`: list\n",
948
- " List of tuples in the form of (gram, count) for each combination of the gram size in the pass word_list\n",
949
- " \"\"\"\n",
950
- "\n",
951
- " gram_freq_dist = {}\n",
952
- "\n",
953
- " for word in word_list:\n",
954
- " if position == \"start\":\n",
955
- " gram = word[:letters_length] # first 2 letters\n",
956
- " if position == \"end\":\n",
957
- " gram = word[-(letters_length):] # first 2 letters\n",
958
- "\n",
959
- " if gram not in gram_freq_dist:\n",
960
- " gram_freq_dist[gram] = 1\n",
961
- " else:\n",
962
- " gram_freq_dist[gram] += 1\n",
963
- "\n",
964
- " sorted_gram_dist = sorted(gram_freq_dist.items(), key = operator.itemgetter(1), reverse = True)\n",
965
- "\n",
966
- " if search:\n",
967
- " nos = []\n",
968
- " for tup in sorted_gram_dist:\n",
969
- " if tup[0] == search:\n",
970
- " return tup\n",
971
- " else:\n",
972
- " nos.append(\"not here\")\n",
973
- " \n",
974
- " if len(nos) == len(sorted_gram_dist):\n",
975
- " print (\"Search criteria not found in list. Please enter a gram from within the list.\")\n",
976
- " else:\n",
977
- " return sorted_gram_dist\n",
978
- "\n",
979
- "get_gram_freq(word_list = official_words, letters_length = 2, position = \"start\", search = None)[:10]"
980
- ]
981
- },
982
- {
983
- "cell_type": "code",
984
- "execution_count": null,
985
- "metadata": {},
986
- "outputs": [],
987
- "source": [
988
- "test_starts = get_gram_freq(word_list = official_words, letters_length = 2, position = \"start\", search = None)[:10]\n",
989
- "test_ends = get_gram_freq(word_list = official_words, letters_length = 2, position = \"end\", search = None)[:10]\n",
990
- "\n",
991
- "test_words = official_words\n",
992
- "\n",
993
- "for start_gram, start_count in test_starts:\n",
994
- " for end_gram, end_count in test_ends:\n",
995
- " for word in [\"natal\", 'fatal']:\n",
996
- " # for word in test_words:\n",
997
- " if word[:2] == start_gram and word[-2:] == end_gram:\n",
998
- " print (word, start_gram, end_gram)"
999
- ]
1000
- },
1001
- {
1002
- "cell_type": "code",
1003
- "execution_count": null,
1004
- "metadata": {},
1005
- "outputs": [],
1006
- "source": [
1007
- "def wordle_wizard(word_list: list, max_guesses: int = None, \n",
1008
- " guess: str = None, target: str = None,\n",
1009
- " random_guess: bool = False, random_target: bool = False, \n",
1010
- " verbose: bool = False, drama: float = None, \n",
1011
- " return_stats: bool = False, record: bool = False, hf_mod: bool = True):\n",
1012
- " \"\"\"\n",
1013
- " Mimicking the popular web game, this function matches a current word to a target word automatically, in the most statistically optimal way possible.\n",
1014
- "\n",
1015
- " Parameters:\n",
1016
- " ------\n",
1017
- " `word_list`: list\n",
1018
- " list of valid words to be considered\n",
1019
- " `guess`: str\n",
1020
- " a string -- must be the same length as `target_word`\n",
1021
- " `target`: str\n",
1022
- " a string -- must be the same length as `opening_word`\n",
1023
- " `max_guesses`: int\n",
1024
- " the maximum number of attempts allowed to solve the Wordle\n",
1025
- " `random_guess`: bool\n",
1026
- " if True, randomly chooses a starting word from all words within `word_list`. If False, passed starting word must be used instead\n",
1027
- " `random_target`: bool\n",
1028
- " if True, randomly chooses a target word from all words within `word_list`. If False, passed target word must be used instead\n",
1029
- " `verbose`: bool\n",
1030
- " if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess.\n",
1031
- " `drama`: float or int\n",
1032
- " if int provided, each guess' output is delayed by that number of seconds, else each output is shown as quickly as possible. For ~dRaMaTiC eFfEcT~\n",
1033
- " `return_stats`: bool\n",
1034
- " if True, prints nothing and returns a dictionary of various statistics about the function's performance trying to solve the puzzle\n",
1035
- " `record`: bool\n",
1036
- " if True, creates a .txt file with the same information printed according to the indicated verbosity\n",
1037
- "\n",
1038
- " Returns:\n",
1039
- " ------\n",
1040
- " `stats_dict`: dict\n",
1041
- " dictionary containing various statistics about the function's performance trying to solve the puzzle\n",
1042
- " \"\"\"\n",
1043
- "\n",
1044
- " guess = guess.lower()\n",
1045
- " target = target.lower()\n",
1046
- "\n",
1047
- " sugg_words = []\n",
1048
- "\n",
1049
- " for i in range(0, 20):\n",
1050
- " ran_int = random.randint(0, len(word_list) - 1)\n",
1051
- " word = word_list[ran_int]\n",
1052
- " sugg_words.append(word)\n",
1053
- "\n",
1054
- " if guess not in word_list:\n",
1055
- " print (\"Guess word not in passed word list.\\nOnly words within the given word list are valid.\")\n",
1056
- " print (f\"Here are some examples of valid words from the passed word list.\\n\\t{sugg_words[:10]}\")\n",
1057
- " return None\n",
1058
- " \n",
1059
- " if target not in word_list:\n",
1060
- " print (\"Target word not in passed word list.\\nOnly words within the given word list are valid.\")\n",
1061
- " print (f\"Here are some examples of valid words from the passed word list.\\n\\t{sugg_words[-10:]}\")\n",
1062
- " return None\n",
1063
- "\n",
1064
- " if random_guess == True:\n",
1065
- " randomint_guess = random.randint(0, len(word_list) - 1)\n",
1066
- " guess = word_list[randomint_guess]\n",
1067
- "\n",
1068
- " if random_target == True:\n",
1069
- " randomint_target = random.randint(0, len(word_list) - 1)\n",
1070
- " target = word_list[randomint_target]\n",
1071
- "\n",
1072
- " stats_dict = {}\n",
1073
- " stats_dict['first_guess'] = guess\n",
1074
- " stats_dict['target_word'] = target\n",
1075
- " stats_dict['first_guess_vowels'] = float(count_vows_cons(guess, y_vow = True)['vows'])\n",
1076
- " stats_dict['first_guess_consonants'] = float(count_vows_cons(guess, y_vow = True)['cons'])\n",
1077
- " stats_dict['target_vowels'] = float(count_vows_cons(target, y_vow = True)['vows'])\n",
1078
- " stats_dict['target_consonants'] = float(count_vows_cons(target, y_vow = True)['cons'])\n",
1079
- " \n",
1080
- " # get entropy of the first guess word and target word in the entire word_list\n",
1081
- " for tup in get_word_entropy(word_list, word_list, normalized = True):\n",
1082
- " if tup[0] == guess:\n",
1083
- " stats_dict['first_guess_entropy'] = tup[1]\n",
1084
- " if tup[0] == target:\n",
1085
- " stats_dict['target_entropy'] = tup[1]\n",
1086
- "\n",
1087
- " guess_entropies = []\n",
1088
- " guess_entropies.append(stats_dict['first_guess_entropy'])\n",
1089
- "\n",
1090
- " # luck_guess_1 = round(1 - ((1 / len(word_list)) * guess_entropies[0] / 100), 2) * 100\n",
1091
- "\n",
1092
- " english_alphabet = \"abcdefghijklmnopqrstuvwxyz\"\n",
1093
- "\n",
1094
- " word_list_sorted_counts = get_letter_counts(english_alphabet, word_list, sort = \"descending\")\n",
1095
- " \n",
1096
- " wordlen = len(guess)\n",
1097
- " letter_positions = set(i for i in range(0, wordlen))\n",
1098
- "\n",
1099
- " guess_set = set()\n",
1100
- " perfect_dict = {}\n",
1101
- " wrong_pos_dict = {}\n",
1102
- " wrong_pos_set = set()\n",
1103
- " dont_guess_again = set()\n",
1104
- "\n",
1105
- " guessed_words = [] # running set of guessed words\n",
1106
- " guess_num = 0 # baseline for variable\n",
1107
- " dont_guess_words = set()\n",
1108
- " incorrect_positions = []\n",
1109
- " reduction_per_guess = []\n",
1110
- "\n",
1111
- " if max_guesses == None: # if no value is passed, default is len(guess)\n",
1112
- " max_guesses = wordlen\n",
1113
- " else: # else it is the value passed\n",
1114
- " max_guesses = max_guesses\n",
1115
- "\n",
1116
- " perfect_letts_per_guess = []\n",
1117
- " wrong_pos_per_guess = []\n",
1118
- " wrong_letts_per_guess = []\n",
1119
- "\n",
1120
- " record_list = []\n",
1121
- "\n",
1122
- " while guess: # while there is any guess -- there are conditions to break it at the bottom\n",
1123
- "\n",
1124
- " guess_num += 1\n",
1125
- "\n",
1126
- " guessed_words.append(guess)\n",
1127
- "\n",
1128
- " if drama:\n",
1129
- " time.sleep(drama)\n",
1130
- "\n",
1131
- " # guess_num += 1 # each time the guess is processed\n",
1132
- " if return_stats == False:\n",
1133
- " if guess_num == 1:\n",
1134
- " print(\"-----------------------------\\n\")\n",
1135
- " record_list.append(\"-----------------------------\\n\")\n",
1136
- " \n",
1137
- " if return_stats == False:\n",
1138
- " print(f\"Guess {guess_num}: '{guess}'\")\n",
1139
- " record_list.append(f\"Guess {guess_num}: '{guess}'\")\n",
1140
- "\n",
1141
- " if guess == target:\n",
1142
- " stats_dict['target_guessed'] = True\n",
1143
- " if return_stats == False:\n",
1144
- " if guess_num == 1:\n",
1145
- " print(f\"Congratulations! The Wordle has been solved in {guess_num} guess, that's amazingly lucky!\")\n",
1146
- " print(f\"The target word was {target}\")\n",
1147
- " record_list.append(f\"Congratulations! The Wordle has been solved in {guess_num} guess, that's amazingly lucky!\")\n",
1148
- " record_list.append(f\"The target word was '{target}'.\")\n",
1149
- " perfect_letts_per_guess.append(5)\n",
1150
- " wrong_pos_per_guess.append(0)\n",
1151
- " wrong_letts_per_guess.append(0)\n",
1152
- " break\n",
1153
- "\n",
1154
- " guess_set = set()\n",
1155
- " wrong_pos_set = set()\n",
1156
- "\n",
1157
- " #### Step 2 -- ALL PERFECT\n",
1158
- " for i in letter_positions: # number of letters in each word (current word and target word)\n",
1159
- " guess_set.add(guess[i])\n",
1160
- "\n",
1161
- " if guess[i] not in perfect_dict:\n",
1162
- " perfect_dict[guess[i]] = set()\n",
1163
- " if guess[i] not in wrong_pos_dict:\n",
1164
- " wrong_pos_dict[guess[i]] = set()\n",
1165
- "\n",
1166
- " ### EVALUATE CURRENT GUESS\n",
1167
- " if guess[i] == target[i]: # letter == correct and position == correct\n",
1168
- " perfect_dict[guess[i]].add(i)\n",
1169
- "\n",
1170
- " if (guess[i] != target[i] and guess[i] in target): # letter == correct and position != correct\n",
1171
- " wrong_pos_dict[guess[i]].add(i)\n",
1172
- " wrong_pos_set.add(guess[i])\n",
1173
- "\n",
1174
- " if guess[i] not in target: # if letter is not relevant at all\n",
1175
- " dont_guess_again.add(guess[i])\n",
1176
- "\n",
1177
- " #### Step 3 -- ALL PERFECT\n",
1178
- " next_letters = set()\n",
1179
- " for letter, positions in perfect_dict.items():\n",
1180
- " if len(positions) > 0:\n",
1181
- " next_letters.add(letter)\n",
1182
- "\n",
1183
- " for letter, positions in wrong_pos_dict.items():\n",
1184
- " if len(positions) > 0:\n",
1185
- " next_letters.add(letter)\n",
1186
- "\n",
1187
- " #### List of tuples of correct letter positions in new valid words. Eg: [('e', 2), ('a', 3)]\n",
1188
- " perfect_letters = []\n",
1189
- " for letter, positions in perfect_dict.items():\n",
1190
- " for pos in positions:\n",
1191
- " if len(positions) > 0:\n",
1192
- " perfect_letters.append((letter, pos))\n",
1193
- "\n",
1194
- " #### all words that have correct letters in same spots\n",
1195
- " words_matching_correct_all = []\n",
1196
- " for word in word_list:\n",
1197
- " word_set = set()\n",
1198
- " for letter, pos in perfect_letters:\n",
1199
- " if pos < len(word):\n",
1200
- " if word[pos] == letter:\n",
1201
- " words_matching_correct_all.append(word)\n",
1202
- "\n",
1203
- " #### excluding words with letters in known incorrect positions\n",
1204
- " for letter, positions in wrong_pos_dict.items():\n",
1205
- " for pos in positions:\n",
1206
- " if len(positions) > 0:\n",
1207
- " if (letter, pos) not in incorrect_positions:\n",
1208
- " incorrect_positions.append((letter, pos))\n",
1209
- "\n",
1210
- " # sorting lists of tuples just to make them look nice in the printout\n",
1211
- " incorrect_positions = sorted(incorrect_positions, key = operator.itemgetter(1), reverse = False)\n",
1212
- " perfect_letters = sorted(perfect_letters, key = operator.itemgetter(1), reverse = False)\n",
1213
- "\n",
1214
- " #### all words that have correct letters in incorrect spots -- so they can be excluded efficiently\n",
1215
- " \n",
1216
- " # print(incorrect_positions)\n",
1217
- " \n",
1218
- " for word in word_list:\n",
1219
- " word_set = set()\n",
1220
- " for letter, pos in incorrect_positions:\n",
1221
- " if pos < len(word):\n",
1222
- " if word[pos] == letter:\n",
1223
- " dont_guess_words.add(word)\n",
1224
- " for word in word_list:\n",
1225
- " word_set = set()\n",
1226
- " for letter, pos in incorrect_positions:\n",
1227
- " if pos < len(word):\n",
1228
- " if word[pos] == letter:\n",
1229
- " dont_guess_words.add(word)\n",
1230
- "\n",
1231
- " for bad_letter in dont_guess_again:\n",
1232
- " for word in word_list:\n",
1233
- " if (bad_letter in word and word not in dont_guess_words):\n",
1234
- " dont_guess_words.add(word)\n",
1235
- "\n",
1236
- " if return_stats == False:\n",
1237
- " if verbose == True:\n",
1238
- " print(f\"Letters in correct positions:\\n\\t{perfect_letters}\\n\")\n",
1239
- " print(f\"Letters in incorrect positions:\\n\\t{incorrect_positions}\\n\")\n",
1240
- " print (f\"Letters to guess again:\\n\\t{sorted(list(next_letters), reverse = False)}\\n\")\n",
1241
- " print(f\"Letters to not guess again:\\n\\t{sorted(list(dont_guess_again), reverse = False)}\\n\") # works\n",
1242
- " \n",
1243
- " if len(perfect_letters) == 0:\n",
1244
- " record_list.append(f\"Letters in correct positions: None\\n\")\n",
1245
- " else:\n",
1246
- " record_list.append(f\"Letters in correct positions: {perfect_letters}\\n\")\n",
1247
- " \n",
1248
- " if len(incorrect_positions) == 0:\n",
1249
- " record_list.append(f\"Letters in incorrect positions: None\\n\")\n",
1250
- " else:\n",
1251
- " record_list.append(f\"Letters in incorrect positions: {incorrect_positions}\\n\")\n",
1252
- " \n",
1253
- " if len(next_letters) == 0:\n",
1254
- " record_list.append(f\"Letters to guess again: None\\n\")\n",
1255
- " else:\n",
1256
- " record_list.append(f\"Letters to guess again: {sorted(list(next_letters), reverse = False)}\\n\")\n",
1257
- " # if\n",
1258
- " # record_list.append(f\"Letters to not guess again: {sorted(list(dont_guess_again), reverse = False)}\\n\") # works\n",
1259
- "\n",
1260
- " # Returns True\n",
1261
- " # print(A.issubset(B)) # \"if everything in A is in B\", returns Bool\n",
1262
- "\n",
1263
- " perfect_letts_per_guess.append(len(perfect_letters))\n",
1264
- " wrong_pos_per_guess.append(len(incorrect_positions))\n",
1265
- " wrong_letts_per_guess.append(len(dont_guess_again))\n",
1266
- "\n",
1267
- " potential_next_guesses = set()\n",
1268
- " middle_set = set()\n",
1269
- "\n",
1270
- " if len(perfect_letters) == 0 and len(incorrect_positions) == 0: # if there are NEITHER perfect letters, NOR incorrect positions, ....\n",
1271
- " for word in word_list:\n",
1272
- " if word not in dont_guess_words:\n",
1273
- " if word not in guessed_words:\n",
1274
- " potential_next_guesses.add(word)\n",
1275
- " \n",
1276
- " # print(f\"GUESS {guess_num} : TEST 1-1\")\n",
1277
- "\n",
1278
- " if len(perfect_letters) == 0 and len(incorrect_positions) != 0: # if there are no perfect letters whatsoever, but there ARE incorrect positions ....\n",
1279
- " for word in word_list:\n",
1280
- " for incor_letter, incor_pos in incorrect_positions:\n",
1281
- " if incor_pos < len(word):\n",
1282
- " if word[incor_pos] != incor_letter:\n",
1283
- " if word not in dont_guess_words: # just in case\n",
1284
- " word_set = set()\n",
1285
- " for letter in word:\n",
1286
- " word_set.add(letter)\n",
1287
- " \n",
1288
- " if next_letters.issubset(word_set):\n",
1289
- " if word not in guessed_words:\n",
1290
- " if len(dont_guess_again) > 0:\n",
1291
- " for bad_letter in dont_guess_again:\n",
1292
- " if bad_letter not in word:\n",
1293
- " # potential_next_guesses.append(word)\n",
1294
- " potential_next_guesses.add(word)\n",
1295
- " else:\n",
1296
- " potential_next_guesses.add(word)\n",
1297
- " \n",
1298
- " # print(f\"GUESS {guess_num} : TEST 2-1\")\n",
1299
- "\n",
1300
- " else:\n",
1301
- " for word in word_list:\n",
1302
- " if word not in dont_guess_words: # just in case\n",
1303
- " word_set = set()\n",
1304
- " for letter in word:\n",
1305
- " word_set.add(letter)\n",
1306
- " if next_letters.issubset(word_set):\n",
1307
- " if word not in guessed_words:\n",
1308
- " # print (\"TEST 3-2\")\n",
1309
- "\n",
1310
- " if len(dont_guess_again) > 0:\n",
1311
- " for bad_letter in dont_guess_again:\n",
1312
- " if bad_letter not in word:\n",
1313
- " middle_set.add(word)\n",
1314
- " else:\n",
1315
- " middle_set.add(word)\n",
1316
- " for word in middle_set:\n",
1317
- " dummy_list = []\n",
1318
- " for good_lett, good_pos in perfect_letters:\n",
1319
- " if word[good_pos] == good_lett:\n",
1320
- " dummy_list.append(1)\n",
1321
- " if len(dummy_list) == len(perfect_letters):\n",
1322
- " potential_next_guesses.add(word)\n",
1323
- " for word in middle_set:\n",
1324
- " dummy_list = []\n",
1325
- " for bad_lett, bad_pos in incorrect_positions:\n",
1326
- " if bad_pos < len(word):\n",
1327
- " if word[bad_pos] == bad_lett:\n",
1328
- " dummy_list.append(1)\n",
1329
- " if len(dummy_list) > 0:\n",
1330
- " potential_next_guesses.remove(word)\n",
1331
- " \n",
1332
- " # print(f\"GUESS {guess_num} : TEST 3-1\")\n",
1333
- "\n",
1334
- " if return_stats == False:\n",
1335
- " if verbose == True:\n",
1336
- " print(f\"At this point:\")\n",
1337
- " print(f\"\\t{len(word_list) - len(potential_next_guesses)}, {round((len(word_list) - len(potential_next_guesses)) / len(word_list) * 100, 2)}% of total words have been eliminated, and\")\n",
1338
- " print(f\"\\t{len(potential_next_guesses)}, {round(len(potential_next_guesses) / len(word_list) * 100, 2)}% of total words remain possible.\\n\")\n",
1339
- " # record_list.append(f\"At this point:\")\n",
1340
- " record_list.append(f\"{len(word_list) - len(potential_next_guesses)} ({round((len(word_list) - len(potential_next_guesses)) / len(word_list) * 100, 2)}% of all) words have been eliminated, and {len(potential_next_guesses)} ({round(len(potential_next_guesses) / len(word_list) * 100, 2)}% of all) words are still possible.\\n\")\n",
1341
- " \n",
1342
- " reduction_per_guess.append(len(potential_next_guesses))\n",
1343
- " \n",
1344
- " #### Guessing next word\n",
1345
- " if len(potential_next_guesses) == 1:\n",
1346
- "\n",
1347
- " if return_stats == False:\n",
1348
- " if verbose == True:\n",
1349
- " print(f\"The only remaining possible word is:\\n\\t'{list(potential_next_guesses)[0]}'\\n\")\n",
1350
- " record_list.append(f\"The only remaining possible word is: '{list(potential_next_guesses)[0]}'\\n\")\n",
1351
- " \n",
1352
- " guess = list(potential_next_guesses)[0]\n",
1353
- " guess_entropies.append(get_word_entropy([guess], word_list, normalized = True, ascending = False)[0][1])\n",
1354
- "\n",
1355
- " else:\n",
1356
- "\n",
1357
- " best_next_guesses = list(potential_next_guesses) \n",
1358
- " # print (best_next_guesses)\n",
1359
- " word_ratings = get_word_entropy(best_next_guesses, word_list, normalized = True, ascending = False) # \"internal\" ratings\n",
1360
- " \n",
1361
- " # Get max rating of all words\n",
1362
- " max_rating = -np.inf\n",
1363
- " for word, rating in word_ratings:\n",
1364
- " if rating > max_rating:\n",
1365
- " max_rating = rating\n",
1366
- "\n",
1367
- " # add best rated words (all equally best entropy in next guess list) to set\n",
1368
- " best_of_the_best_1 = []\n",
1369
- " for word, rating in word_ratings:\n",
1370
- " if rating == max_rating:\n",
1371
- " best_of_the_best_1.append(word)\n",
1372
- "\n",
1373
- " # only using top ten most frequent prefixes suffixes to bias. After that it the impact is especially negligible\n",
1374
- " test_starts = get_gram_freq(word_list = word_list, letters_length = 2, position = \"start\", search = None)[:10]\n",
1375
- " test_ends = get_gram_freq(word_list = word_list, letters_length = 2, position = \"end\", search = None)[:10]\n",
1376
- "\n",
1377
- " # list of the best words that also have the best suffixes and prefixes\n",
1378
- " best_of_the_best_2 = []\n",
1379
- " for start_gram, start_count in test_starts:\n",
1380
- " for end_gram, end_count in test_ends:\n",
1381
- " for word in test_words:\n",
1382
- " if word[:2] == start_gram and word[-2:] == end_gram:\n",
1383
- " best_of_the_best_2.append(word)\n",
1384
- "\n",
1385
- " if len(best_of_the_best_2) > 0:\n",
1386
- " guess = best_of_the_best_1[0]\n",
1387
- " else:\n",
1388
- " guess = best_of_the_best_2[0] # they're all equally the best of the best possible guesses so just pick the first\n",
1389
- " \n",
1390
- " # guess_entropies.append(get_word_entropy([guess], word_list, normalized = True, ascending = False)[0][1])\n",
1391
- "\n",
1392
- " if return_stats == False:\n",
1393
- " if verbose == True:\n",
1394
- " if len(word_ratings) <= 40:\n",
1395
- " print(f\"All potential next guesses:\\n\\t{word_ratings}\\n\")\n",
1396
- " print(f\"Words guessed so far:\\n\\t{guessed_words}.\\n\")\n",
1397
- " record_list.append(f\"Potential next guesses: {word_ratings}\\n\")\n",
1398
- " record_list.append(f\"Words guessed so far: {guessed_words}.\\n\")\n",
1399
- " else:\n",
1400
- " print(f\"The top 40 potential next guesses are:\\n\\t{word_ratings[:40]}\\n\")\n",
1401
- " print(f\"Words guessed so far:\\n\\t{guessed_words}.\\n\")\n",
1402
- " record_list.append(f\"The top 40 potential next guesses are: {word_ratings[:40]}\\n\")\n",
1403
- " record_list.append(f\"Words guessed so far: {guessed_words}.\\n\")\n",
1404
- "\n",
1405
- " guess_entropies.append(get_word_entropy([guess], word_list, normalized = True, ascending = False)[0][1])\n",
1406
- "\n",
1407
- " #### Guess has now been made -- what to do next\n",
1408
- " if guess_num == max_guesses: # if at max guesses allowed\n",
1409
- " guessed_words.append(guess)\n",
1410
- " stats_dict['target_guessed'] = False\n",
1411
- " if return_stats == False:\n",
1412
- " if verbose == True:\n",
1413
- " # print(\"-----------------------------\\n\")\n",
1414
- " print(f\"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.\\n\")\n",
1415
- " print(f\"The target word was '{target}'. Better luck next time!\\n\")\n",
1416
- " print(\"-----------------------------\\n\")\n",
1417
- " record_list.append(f\"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.\\n\")\n",
1418
- " record_list.append(f\"The target word was '{target}'. Better luck next time!\\n\")\n",
1419
- " record_list.append(\"-----------------------------\\n\")\n",
1420
- " else:\n",
1421
- " print(f\"\\nUnfortunately, the Wordle could not be solved in {max_guesses} guesses.\")\n",
1422
- " print(f\"The target word was '{target}'. Better luck next time!\\n\")\n",
1423
- " record_list.append(f\"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.\")\n",
1424
- " record_list.append(f\"The target word was '{target}'. Better luck next time!\\n\")\n",
1425
- " break\n",
1426
- " else: # if not at max guesses yet allowed\n",
1427
- " # stats_dict['target_guessed'] = False\n",
1428
- " if return_stats == False:\n",
1429
- " if verbose == True:\n",
1430
- " print(f\"Next guess:\\n\\t'{guess}'\")\n",
1431
- " print(\"\\n-----------------------------\\n\")\n",
1432
- " record_list.append(f\"Next guess: '{guess}'\")\n",
1433
- " record_list.append(\"-----------------------------\\n\")\n",
1434
- "\n",
1435
- " if guess == target:\n",
1436
- " guess_num += 1\n",
1437
- " guessed_words.append(guess)\n",
1438
- " stats_dict['target_guessed'] = True\n",
1439
- "\n",
1440
- " if return_stats == False:\n",
1441
- " print(f\"Guess {guess_num}: '{guess}'\\n\")\n",
1442
- " print(f\"Congratulations! The Wordle has been solved in {guess_num} guesses!\")\n",
1443
- " record_list.append(f\"Guess {guess_num}: '{guess}'\\n\")\n",
1444
- " record_list.append(f\"Congratulations! The Wordle has been solved in {guess_num} guesses!\")\n",
1445
- "\n",
1446
- " if max_guesses - guess_num == 0:\n",
1447
- " print(f\"Lucky! It was the last guess.\")\n",
1448
- " record_list.append(f\"Lucky! It was the last guess.\")\n",
1449
- " else:\n",
1450
- " print(f\"There were still {max_guesses - guess_num} guesses remaining.\")\n",
1451
- " record_list.append(f\"There were still {max_guesses - guess_num} guesses remaining.\")\n",
1452
- "\n",
1453
- " if return_stats == False: \n",
1454
- " # stats_dict['target_guessed'] = True \n",
1455
- " print(f\"\\nThe target word was '{target}'.\")\n",
1456
- " print(\"\\n-----------------------------\")\n",
1457
- " record_list.append(f\"The target word was '{target}'.\")\n",
1458
- " record_list.append(\"-----------------------------\")\n",
1459
- " break\n",
1460
- "\n",
1461
- " #### STATS STUFF \n",
1462
- " mid_guesses_vows = 0\n",
1463
- " mid_guesses_cons = 0\n",
1464
- " avg_perf_letters = 0\n",
1465
- " avg_wrong_pos_letters = 0\n",
1466
- " avg_wrong_letters = 0\n",
1467
- "\n",
1468
- " for i, word in enumerate(guessed_words):\n",
1469
- " mid_guesses_vows += count_vows_cons(word, y_vow = True)['vows']\n",
1470
- " mid_guesses_cons += count_vows_cons(word, y_vow = True)['cons']\n",
1471
- " \n",
1472
- " for i in range(0, len(guessed_words) - 1):\n",
1473
- " avg_perf_letters += perfect_letts_per_guess[i]\n",
1474
- " avg_wrong_pos_letters += wrong_pos_per_guess[i]\n",
1475
- " avg_wrong_letters += wrong_letts_per_guess[i]\n",
1476
- "\n",
1477
- " stats_dict['mid_guesses_avg_vows'] = float(round(mid_guesses_vows / len(guessed_words), 2))\n",
1478
- " stats_dict['mid_guesses_avg_cons'] = float(round(mid_guesses_cons / len(guessed_words), 2))\n",
1479
- "\n",
1480
- " stats_dict['avg_perf_letters'] = float(round(np.mean(avg_perf_letters), 2))\n",
1481
- " stats_dict['avg_wrong_pos_letters'] = float(round(np.mean(avg_wrong_pos_letters), 2))\n",
1482
- " stats_dict['avg_wrong_letters'] = float(round(np.mean(avg_wrong_letters), 2))\n",
1483
- " \n",
1484
- " # average number of words remaining after each guess -- the higher this is, the luckier the person got (the lower, the more guesses it took)\n",
1485
- " stats_dict['avg_remaining'] = float(round(np.mean(reduction_per_guess), 2))\n",
1486
- "\n",
1487
- " # avg entropy of each guessed word relative to all other words possible at that moment -- this should consistently be 100 for the algorithm, but will be different for user\n",
1488
- " if len(guess_entropies) > 1: # in case of guessing it correctly on the first try\n",
1489
- " sum_entropies = 0\n",
1490
- " for entropy in guess_entropies:\n",
1491
- " sum_entropies += entropy\n",
1492
- "\n",
1493
- " average_entropy = float(round(sum_entropies / len(guess_entropies), 2))\n",
1494
- " stats_dict['avg_intermediate_guess_entropy'] = average_entropy\n",
1495
- " else:\n",
1496
- " stats_dict['avg_intermediate_guess_entropy'] = float(100)\n",
1497
- "\n",
1498
- " expected_guesses = 3.85\n",
1499
- "\n",
1500
- " # guess_num = 3\n",
1501
- " # average_entropy = 95\n",
1502
- " luck = round(1 - ((((guess_num / expected_guesses) * (stats_dict['avg_intermediate_guess_entropy'] / 100)) / max_guesses) * 5), 2)\n",
1503
- " stats_dict['luck'] = luck\n",
1504
- "\n",
1505
- " if record == True:\n",
1506
- " if verbose == True:\n",
1507
- " with open(f\"solutions/{guessed_words[0]}_{target}_wizard_detailed.txt\", \"w\") as fout:\n",
1508
- " for line in record_list:\n",
1509
- " fout.write(line + \"\\n\") # write each line of list of printed text to .txt file\n",
1510
- " else:\n",
1511
- " with open(f\"solutions/{guessed_words[0]}_{target}_wizard_summary.txt\", \"w\") as fout:\n",
1512
- " for line in record_list:\n",
1513
- " fout.write(line + \"\\n\") # write\n",
1514
- "\n",
1515
- " # if guess_num <= len(guess):\n",
1516
- " if guess_num <= 6:\n",
1517
- " stats_dict['valid_success'] = True\n",
1518
- " else:\n",
1519
- " stats_dict['valid_success'] = False\n",
1520
- "\n",
1521
- " stats_dict['num_guesses'] = float(guess_num)\n",
1522
- "\n",
1523
- " # if return_stats == True:\n",
1524
- " # return stats_dict\n",
1525
- " if hf_mod == True:\n",
1526
- " return record_list"
1527
- ]
1528
- },
1529
- {
1530
- "cell_type": "code",
1531
- "execution_count": null,
1532
- "metadata": {},
1533
- "outputs": [],
1534
- "source": [
1535
- "test_1 = wordle_wizard(word_list = official_words, max_guesses = 6, \n",
1536
- " guess = \"quota\", target = \"fatAl\",\n",
1537
- " random_guess = False, random_target = False, \n",
1538
- " verbose = True, drama = 0, return_stats = False, record = False)"
1539
- ]
1540
- },
1541
- {
1542
- "cell_type": "code",
1543
- "execution_count": null,
1544
- "metadata": {},
1545
- "outputs": [],
1546
- "source": [
1547
- "suffix_freq_dist = {}\n",
1548
- "prefix_freq_dist = {}\n",
1549
- "\n",
1550
- "for word in official_words:\n",
1551
- " prefix = word[:2] # first 2 letters\n",
1552
- " suffix = word[-2:] # last 2 letters\n",
1553
- " if prefix not in prefix_freq_dist:\n",
1554
- " prefix_freq_dist[prefix] = 1\n",
1555
- " else:\n",
1556
- " prefix_freq_dist[prefix] += 1\n",
1557
- "\n",
1558
- " if suffix not in suffix_freq_dist:\n",
1559
- " suffix_freq_dist[suffix] = 1\n",
1560
- " else:\n",
1561
- " suffix_freq_dist[suffix] += 1\n",
1562
- "\n",
1563
- "suffix_types = [key for key in suffix_freq_dist.keys()]\n",
1564
- "prefix_types = [key for key in prefix_freq_dist.keys()]\n",
1565
- "\n",
1566
- "sorted_prefix_dist = sorted(prefix_freq_dist.items(), key = operator.itemgetter(1), reverse = True)\n",
1567
- "sorted_suffix_dist = sorted(suffix_freq_dist.items(), key = operator.itemgetter(1), reverse = True)\n",
1568
- "\n",
1569
- "print(\"Prefixes:\")\n",
1570
- "print(len(sorted_prefix_dist))\n",
1571
- "print(sorted_prefix_dist[:10])\n",
1572
- "print(\"-----\")\n",
1573
- "print(\"Suffixes:\")\n",
1574
- "print(len(sorted_suffix_dist))\n",
1575
- "print(sorted_suffix_dist[:10])\n",
1576
- "\n",
1577
- "for tup in sorted_prefix_dist:\n",
1578
- " if tup[0] in [\"ho\", 'jo', 'go']:\n",
1579
- " print (tup)"
1580
- ]
1581
- },
1582
- {
1583
- "cell_type": "code",
1584
- "execution_count": null,
1585
- "metadata": {},
1586
- "outputs": [],
1587
- "source": [
1588
- "grams_freq_dist = {}\n",
1589
- "gram_len = 3\n",
1590
- "\n",
1591
- "for word in official_words:\n",
1592
- " for i in range(0, len(word) - (gram_len - 1)): # so it doesn't index out of range\n",
1593
- " gram = word[i:i + gram_len]\n",
1594
- "\n",
1595
- " if gram not in grams_freq_dist:\n",
1596
- " grams_freq_dist[gram] = 1\n",
1597
- " else:\n",
1598
- " grams_freq_dist[gram] += 1\n",
1599
- "\n",
1600
- "print(len(grams_freq_dist))\n",
1601
- "sorted_gram_dist = sorted(grams_freq_dist.items(), key = operator.itemgetter(1), reverse = True)\n",
1602
- "sorted_gram_dist[:15]"
1603
- ]
1604
- },
1605
- {
1606
- "cell_type": "code",
1607
- "execution_count": null,
1608
- "metadata": {},
1609
- "outputs": [],
1610
- "source": []
1611
- },
1612
- {
1613
- "cell_type": "code",
1614
- "execution_count": null,
1615
- "metadata": {},
1616
- "outputs": [],
1617
- "source": []
1618
- },
1619
- {
1620
- "cell_type": "code",
1621
- "execution_count": null,
1622
- "metadata": {},
1623
- "outputs": [],
1624
- "source": []
1625
- }
1626
- ],
1627
- "metadata": {
1628
- "kernelspec": {
1629
- "display_name": "base",
1630
- "language": "python",
1631
- "name": "python3"
1632
- },
1633
- "language_info": {
1634
- "codemirror_mode": {
1635
- "name": "ipython",
1636
- "version": 3
1637
- },
1638
- "file_extension": ".py",
1639
- "mimetype": "text/x-python",
1640
- "name": "python",
1641
- "nbconvert_exporter": "python",
1642
- "pygments_lexer": "ipython3",
1643
- "version": "3.10.4"
1644
- },
1645
- "orig_nbformat": 4,
1646
- "vscode": {
1647
- "interpreter": {
1648
- "hash": "3d597f4c481aa0f25dceb95d2a0067e73c0966dcbd003d741d821a7208527ecf"
1649
- }
1650
- }
1651
- },
1652
- "nbformat": 4,
1653
- "nbformat_minor": 2
1654
- }