Spaces:
Sleeping
Sleeping
import numpy as np # for stats | |
import random # for randomly generating target and start words | |
import operator # for sorting letter frequency distribution | |
import time # for #dramaticeffect | |
import pandas as pd | |
# from nltk.corpus import movie_reviews, treebank, brown, gutenberg, switchboard | |
english_alphabet = "abcdefghijklmnopqrstuvwxyz" | |
def get_letter_counts(letters: str, word_list: list, sort: str = "descending"): | |
""" | |
Given a passed str of letters and a list of words, produces a frequency distribution of all letters | |
Parameters: | |
------ | |
`letters`: str | |
a string of letters to be counted. String must only be desired letters, with no spaces. Default is local variable containing all letters of the English alphabet | |
`word_list`: list | |
list of words (str) from which word frequencies will be counted | |
`sort`: str | |
if either "descending" or "ascending" are passed, returned list of tuples will be sorted accoringly, else returned dictionary will be unsorted | |
Returns: | |
------ | |
`letters_counts_dict`: dict | |
dictionary of {letter : count} pairs for each letter in passed `letters` sequence | |
`sorted_counts_dicts`: list of tuples | |
list of tuples. Format is ("letter", frequency). Ordered according to `sort` values | |
""" | |
words_counts_dict = {} | |
for word in word_list: # real dataset | |
word_dict = {} | |
for letter in word: | |
if letter in word_dict: | |
word_dict[letter] += 1 | |
else: | |
word_dict[letter] = 1 | |
words_counts_dict[word] = word_dict | |
letters_counts_dict = {} | |
for letter in letters: | |
letters_counts_dict[letter] = 0 | |
for word, count_dict in words_counts_dict.items(): | |
# print (word, count_dict) | |
for letter, count in count_dict.items(): | |
letters_counts_dict[letter] += count | |
if sort == "ascending": | |
sorted_counts_dict = (sorted(letters_counts_dict.items(), key = operator.itemgetter(1), reverse = False)) | |
return sorted_counts_dicts | |
if sort == "descending": | |
sorted_counts_dict = sorted(letters_counts_dict.items(), key = operator.itemgetter(1), reverse = True) | |
return sorted_counts_dict | |
else: | |
return letters_counts_dict | |
### Best first guesses for a given Wordle list | |
def best_guess_words(word_list: list, show_letters: bool = False): | |
""" | |
Given a passed list of English words of a consistent length, calculates the most statistically optimal first guess words, alongside a rating for each word. | |
Rating = sum(frequency of each unique letter in that word) / sum (all unique letter frequencies in word_list) * 100, rounded to 2 decimals. | |
------ | |
Parameters: | |
------ | |
`word_list`: list | |
list of words (str) of consistent length | |
`show_letters`: bool | |
if True, also prints set of most optimal letters to guess | |
------ | |
Returns: | |
------ | |
`word_ratings`: list | |
list of tuples. Format is [(word, rating)], where rating is calculated according to above formula | |
`sorted_counts`: list of tuples | |
list of tuples. Format is ("letter", frequency). Sorted according to `sort` value; ["descending" or "ascending"] if passed | |
""" | |
english_alphabet = "abcdefghijklmnopqrstuvwxyz" | |
sorted_counts = get_letter_counts(english_alphabet, word_list, sort = "descending") | |
max_len_possible = len(word_list[0]) | |
### Get words with the highest letter diversity | |
while max_len_possible: | |
best_letters = set() | |
best_words = [] | |
for letter, freq in sorted_counts: | |
best_letters.add(letter) | |
if len(best_letters) == max_len_possible: | |
break | |
### Get all words that have one of each of the 5 top most frequent letters | |
for word in word_list: | |
word_set = set() | |
for letter in word: | |
word_set.add(letter) | |
if best_letters.issubset(word_set): | |
best_words.append(word) | |
if len(best_words) > 0: | |
break | |
else: | |
max_len_possible -= 1 # only try the top 4 letters, then 3, then 2, ... | |
if max_len_possible == 0: | |
break | |
all_letters_count = 0 | |
for letter, freq in sorted_counts: | |
all_letters_count += freq | |
word_ratings = [] | |
for word in best_words: | |
ratings_dict = {} | |
for letter in word: | |
for freq_letter, freq in sorted_counts: | |
if letter == freq_letter: | |
ratings_dict[letter] = freq | |
total_rating = 0 | |
for letter, rating in ratings_dict.items(): | |
total_rating += rating | |
word_ratings.append((word, round(total_rating / all_letters_count * 100, 2))) | |
word_ratings = sorted(word_ratings, key = operator.itemgetter(1), reverse = True) | |
if show_letters == True: | |
return word_ratings, sorted_counts | |
else: | |
return word_ratings | |
def count_vows_cons(word: str, y_vow = True): | |
""" | |
Given a passed word, calculate the number of non-unique vowels and consonants in the word (duplicates counted more than once). | |
------ | |
Parameters: | |
------ | |
`word`: str | |
a single passed word (str) | |
`y_vow`: bool | |
if True, "y" is considered a vowel. If False, "y" considered a consonant. Default is True | |
------ | |
Returns: | |
------ | |
`counts`: dict | |
dictionary, where format is {letter type : count} | |
""" | |
word = word.lower() # for consistency | |
if y_vow == True: | |
vows = "aeiouy" | |
cons = "bcdfghjklmnpqrstvwxz" | |
elif y_vow == False: | |
vows = "aeiou" | |
cons = "bcdfghjklmnpqrstvwxyz" | |
counts = {} | |
counts["vows"] = 0 | |
counts["cons"] = 0 | |
for letter in word: | |
if letter in vows: | |
counts["vows"] += 1 | |
if letter in cons: | |
counts["cons"] += 1 | |
return counts | |
def get_word_entropy(words_to_rate: list, word_list: list, normalized: bool = True, ascending: bool = False): | |
""" | |
Given a word and a word list, calculates entropy each word as a measure of its impact to the next possible guesses in Wordle, ordered according to `reverse` parameter. | |
------ | |
Parameters: | |
------ | |
`words_to_rate`: list | |
list of strings to be rated | |
`word_list`: list | |
list of all possible words (str) of consistent length, to which each word in `words_to_rate` will be compared | |
`normalized`: bool | |
if True, normalizes all ratings on a scale of 0-100, with 100 being the rating for the most optimal word, and 0 for the least optimal word | |
`ascending`: bool | |
if True, returns list ordered ascending. If False, returns list in descending order | |
------ | |
Returns: | |
------ | |
`word_ratings`: list | |
list of tuples. Format is [(word, rating)], where rating is calculated according to above formula | |
`sorted_counts`: list of tuples | |
list of tuples. Format is ("letter", frequency). Sorted according to `sort` value; ["descending" or "ascending"] if passed | |
""" | |
if ascending == True: | |
sorted_counts = get_letter_counts(english_alphabet, word_list, sort = "ascending") | |
else: | |
sorted_counts = get_letter_counts(english_alphabet, word_list, sort = "descending") | |
all_letters_count = 0 | |
for letter, freq in sorted_counts: | |
all_letters_count += freq | |
unnormalized_ratings = [] | |
for word in words_to_rate: | |
word = word.lower() | |
ratings_dict = {} | |
for letter in word: | |
for freq_letter, freq in sorted_counts: | |
if letter == freq_letter: | |
ratings_dict[letter] = freq | |
total_rating = 0 | |
for letter, rating in ratings_dict.items(): | |
total_rating += rating | |
unnormalized_ratings.append((word, round(total_rating / all_letters_count * 100, 2))) | |
word_ratings = sorted(unnormalized_ratings, key = operator.itemgetter(1), reverse = True) | |
# print (word_ratings) | |
if normalized == True: | |
if len(word_ratings) > 1: | |
new_tests = [] | |
for tup in word_ratings: | |
try: | |
normd = round(((tup[1] - word_ratings[-1][1]) / (word_ratings[0][1] - word_ratings[-1][1])) * 100, 2) | |
new_tests.append((tup[0], normd)) | |
except: | |
ZeroDivisionError | |
new_tests.append((tup[0], 0.0)) | |
return new_tests | |
else: | |
return [(word_ratings[0][0], float(100))] | |
elif normalized == False: | |
return word_ratings | |
### Gets most common words of all words of the dataset | |
def get_word_distribution(word_list: list, sort: str = "descending"): | |
""" | |
Given a passed str of words and a list of words, produces a frequency distribution of all words | |
------ | |
Parameters: | |
------ | |
`word_list`: list | |
list of words (str) from which word frequencies will be counted | |
`sort`: str | |
if either "descending" or "ascending" are passed, returned list of tuples will be sorted accoringly, else returned dictionary will be unsorted | |
------ | |
Returns: | |
------ | |
`words_counts_dict`: dict | |
dictionary of {word : count} pairs for each word in passed `word_list` | |
`sorted_counts_dicts`: list of tuples | |
list of tuples. Format is ("word", frequency). Ordered according to `sort` values | |
""" | |
words_counts_dict = {} | |
for word in word_list: | |
if word in words_counts_dict: | |
words_counts_dict[word] += 1 | |
else: | |
words_counts_dict[word] = 1 | |
if sort == "ascending": | |
sorted_counts_dict = (sorted(words_counts_dict.items(), key = operator.itemgetter(1), reverse = False)) | |
return sorted_counts_dict | |
if sort == "descending": | |
sorted_counts_dict = sorted(words_counts_dict.items(), key = operator.itemgetter(1), reverse = True) | |
return sorted_counts_dict | |
############################################################################################################################################################ | |
############################################################################################################################################################ | |
############################################################################################################################################################ | |
############################################################################################################################################################ | |
def wordle_wizard(word_list: list, max_guesses: int = None, | |
guess: str = None, target: str = None, | |
random_guess: bool = False, random_target: bool = False, | |
verbose: bool = False, drama: float = None, | |
return_stats: bool = False, record: bool = False, hf_mod: bool = True): | |
""" | |
Mimicking the popular web game, this function matches a current word to a target word automatically, in the most statistically optimal way possible. | |
Parameters: | |
------ | |
`word_list`: list | |
list of valid words to be considered | |
`guess`: str | |
a string -- must be the same length as `target_word` | |
`target`: str | |
a string -- must be the same length as `opening_word` | |
`max_guesses`: int | |
the maximum number of attempts allowed to solve the Wordle | |
`random_guess`: bool | |
if True, randomly chooses a starting word from all words within `word_list`. If False, passed starting word must be used instead | |
`random_target`: bool | |
if True, randomly chooses a target word from all words within `word_list`. If False, passed target word must be used instead | |
`verbose`: bool | |
if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess. | |
`drama`: float or int | |
if int provided, each guess' output is delayed by that number of seconds, else each output is shown as quickly as possible. For ~dRaMaTiC eFfEcT~ | |
`return_stats`: bool | |
if True, prints nothing and returns a dictionary of various statistics about the function's performance trying to solve the puzzle | |
`record`: bool | |
if True, creates a .txt file with the same information printed according to the indicated verbosity | |
Returns: | |
------ | |
`stats_dict`: dict | |
dictionary containing various statistics about the function's performance trying to solve the puzzle | |
""" | |
guess = guess.lower() | |
target = target.lower() | |
sugg_words = [] | |
for i in range(0, 20): | |
ran_int = random.randint(0, len(word_list) - 1) | |
word = word_list[ran_int] | |
sugg_words.append(word) | |
if guess not in word_list: | |
print ("Guess word not in passed word list.\nOnly words within the given word list are valid.") | |
print (f"Here are some examples of valid words from the passed word list.\n\t{sugg_words[:10]}") | |
return None | |
if target not in word_list: | |
print ("Target word not in passed word list.\nOnly words within the given word list are valid.") | |
print (f"Here are some examples of valid words from the passed word list.\n\t{sugg_words[-10:]}") | |
return None | |
if random_guess == True: | |
randomint_guess = random.randint(0, len(word_list) - 1) | |
guess = word_list[randomint_guess] | |
if random_target == True: | |
randomint_target = random.randint(0, len(word_list) - 1) | |
target = word_list[randomint_target] | |
stats_dict = {} | |
stats_dict['first_guess'] = guess | |
stats_dict['target_word'] = target | |
stats_dict['first_guess_vowels'] = float(count_vows_cons(guess, y_vow = True)['vows']) | |
stats_dict['first_guess_consonants'] = float(count_vows_cons(guess, y_vow = True)['cons']) | |
stats_dict['target_vowels'] = float(count_vows_cons(target, y_vow = True)['vows']) | |
stats_dict['target_consonants'] = float(count_vows_cons(target, y_vow = True)['cons']) | |
# get entropy of the first guess word and target word in the entire word_list | |
for tup in get_word_entropy(word_list, word_list, normalized = True): | |
if tup[0] == guess: | |
stats_dict['first_guess_entropy'] = tup[1] | |
if tup[0] == target: | |
stats_dict['target_entropy'] = tup[1] | |
guess_entropies = [] | |
guess_entropies.append(stats_dict['first_guess_entropy']) | |
# luck_guess_1 = round(1 - ((1 / len(word_list)) * guess_entropies[0] / 100), 2) * 100 | |
english_alphabet = "abcdefghijklmnopqrstuvwxyz" | |
word_list_sorted_counts = get_letter_counts(english_alphabet, word_list, sort = "descending") | |
wordlen = len(guess) | |
letter_positions = set(i for i in range(0, wordlen)) | |
guess_set = set() | |
perfect_dict = {} | |
wrong_pos_dict = {} | |
wrong_pos_set = set() | |
dont_guess_again = set() | |
guessed_words = [] # running set of guessed words | |
guess_num = 0 # baseline for variable | |
dont_guess_words = set() | |
incorrect_positions = [] | |
reduction_per_guess = [] | |
if max_guesses == None: # if no value is passed, default is len(guess) | |
max_guesses = wordlen | |
else: # else it is the value passed | |
max_guesses = max_guesses | |
perfect_letts_per_guess = [] | |
wrong_pos_per_guess = [] | |
wrong_letts_per_guess = [] | |
record_list = [] | |
while guess: # while there is any guess -- there are conditions to break it at the bottom | |
guess_num += 1 | |
guessed_words.append(guess) | |
if drama: | |
time.sleep(drama) | |
# guess_num += 1 # each time the guess is processed | |
if return_stats == False: | |
if guess_num == 1: | |
print("-----------------------------\n") | |
record_list.append("-----------------------------\n") | |
if return_stats == False: | |
print(f"Guess {guess_num}: '{guess}'") | |
record_list.append(f"Guess {guess_num}: '{guess}'") | |
if guess == target: | |
stats_dict['target_guessed'] = True | |
if return_stats == False: | |
if guess_num == 1: | |
print(f"Congratulations! The Wordle has been solved in {guess_num} guess, that's amazingly lucky!") | |
print(f"The target word was {target}") | |
record_list.append(f"Congratulations! The Wordle has been solved in {guess_num} guess, that's amazingly lucky!") | |
record_list.append(f"The target word was '{target}'.") | |
perfect_letts_per_guess.append(5) | |
wrong_pos_per_guess.append(0) | |
wrong_letts_per_guess.append(0) | |
break | |
guess_set = set() | |
wrong_pos_set = set() | |
#### Step 2 -- ALL PERFECT | |
for i in letter_positions: # number of letters in each word (current word and target word) | |
guess_set.add(guess[i]) | |
if guess[i] not in perfect_dict: | |
perfect_dict[guess[i]] = set() | |
if guess[i] not in wrong_pos_dict: | |
wrong_pos_dict[guess[i]] = set() | |
### EVALUATE CURRENT GUESS | |
if guess[i] == target[i]: # letter == correct and position == correct | |
perfect_dict[guess[i]].add(i) | |
if (guess[i] != target[i] and guess[i] in target): # letter == correct and position != correct | |
wrong_pos_dict[guess[i]].add(i) | |
wrong_pos_set.add(guess[i]) | |
if guess[i] not in target: # if letter is not relevant at all | |
dont_guess_again.add(guess[i]) | |
#### Step 3 -- ALL PERFECT | |
next_letters = set() | |
for letter, positions in perfect_dict.items(): | |
if len(positions) > 0: | |
next_letters.add(letter) | |
for letter, positions in wrong_pos_dict.items(): | |
if len(positions) > 0: | |
next_letters.add(letter) | |
#### List of tuples of correct letter positions in new valid words. Eg: [('e', 2), ('a', 3)] | |
perfect_letters = [] | |
for letter, positions in perfect_dict.items(): | |
for pos in positions: | |
if len(positions) > 0: | |
perfect_letters.append((letter, pos)) | |
#### all words that have correct letters in same spots | |
words_matching_correct_all = [] | |
for word in word_list: | |
word_set = set() | |
for letter, pos in perfect_letters: | |
if pos < len(word): | |
if word[pos] == letter: | |
words_matching_correct_all.append(word) | |
#### excluding words with letters in known incorrect positions | |
for letter, positions in wrong_pos_dict.items(): | |
for pos in positions: | |
if len(positions) > 0: | |
if (letter, pos) not in incorrect_positions: | |
incorrect_positions.append((letter, pos)) | |
# sorting lists of tuples just to make them look nice in the printout | |
incorrect_positions = sorted(incorrect_positions, key = operator.itemgetter(1), reverse = False) | |
perfect_letters = sorted(perfect_letters, key = operator.itemgetter(1), reverse = False) | |
#### all words that have correct letters in incorrect spots -- so they can be excluded efficiently | |
# print(incorrect_positions) | |
for word in word_list: | |
word_set = set() | |
for letter, pos in incorrect_positions: | |
if pos < len(word): | |
if word[pos] == letter: | |
dont_guess_words.add(word) | |
for word in word_list: | |
word_set = set() | |
for letter, pos in incorrect_positions: | |
if pos < len(word): | |
if word[pos] == letter: | |
dont_guess_words.add(word) | |
for bad_letter in dont_guess_again: | |
for word in word_list: | |
if (bad_letter in word and word not in dont_guess_words): | |
dont_guess_words.add(word) | |
if return_stats == False: | |
if verbose == True: | |
print(f"Letters in correct positions:\n\t{perfect_letters}\n") | |
print(f"Letters in incorrect positions:\n\t{incorrect_positions}\n") | |
print (f"Letters to guess again:\n\t{sorted(list(next_letters), reverse = False)}\n") | |
print(f"Letters to not guess again:\n\t{sorted(list(dont_guess_again), reverse = False)}\n") # works | |
if len(perfect_letters) == 0: | |
record_list.append(f"Letters in correct positions: None\n") | |
else: | |
record_list.append(f"Letters in correct positions: {perfect_letters}\n") | |
if len(incorrect_positions) == 0: | |
record_list.append(f"Letters in incorrect positions: None\n") | |
else: | |
record_list.append(f"Letters in incorrect positions: {incorrect_positions}\n") | |
if len(next_letters) == 0: | |
record_list.append(f"Letters to guess again: None\n") | |
else: | |
record_list.append(f"Letters to guess again: {sorted(list(next_letters), reverse = False)}\n") | |
# if | |
# record_list.append(f"Letters to not guess again: {sorted(list(dont_guess_again), reverse = False)}\n") # works | |
# Returns True | |
# print(A.issubset(B)) # "if everything in A is in B", returns Bool | |
perfect_letts_per_guess.append(len(perfect_letters)) | |
wrong_pos_per_guess.append(len(incorrect_positions)) | |
wrong_letts_per_guess.append(len(dont_guess_again)) | |
potential_next_guesses = set() | |
middle_set = set() | |
if len(perfect_letters) == 0 and len(incorrect_positions) == 0: # if there are NEITHER perfect letters, NOR incorrect positions, .... | |
for word in word_list: | |
if word not in dont_guess_words: | |
if word not in guessed_words: | |
potential_next_guesses.add(word) | |
# print(f"GUESS {guess_num} : TEST 1-1") | |
if len(perfect_letters) == 0 and len(incorrect_positions) != 0: # if there are no perfect letters whatsoever, but there ARE incorrect positions .... | |
for word in word_list: | |
for incor_letter, incor_pos in incorrect_positions: | |
if incor_pos < len(word): | |
if word[incor_pos] != incor_letter: | |
if word not in dont_guess_words: # just in case | |
word_set = set() | |
for letter in word: | |
word_set.add(letter) | |
if next_letters.issubset(word_set): | |
if word not in guessed_words: | |
if len(dont_guess_again) > 0: | |
for bad_letter in dont_guess_again: | |
if bad_letter not in word: | |
# potential_next_guesses.append(word) | |
potential_next_guesses.add(word) | |
else: | |
potential_next_guesses.add(word) | |
# print(f"GUESS {guess_num} : TEST 2-1") | |
else: | |
for word in word_list: | |
if word not in dont_guess_words: # just in case | |
word_set = set() | |
for letter in word: | |
word_set.add(letter) | |
if next_letters.issubset(word_set): | |
if word not in guessed_words: | |
# print ("TEST 3-2") | |
if len(dont_guess_again) > 0: | |
for bad_letter in dont_guess_again: | |
if bad_letter not in word: | |
middle_set.add(word) | |
else: | |
middle_set.add(word) | |
for word in middle_set: | |
dummy_list = [] | |
for good_lett, good_pos in perfect_letters: | |
if word[good_pos] == good_lett: | |
dummy_list.append(1) | |
if len(dummy_list) == len(perfect_letters): | |
potential_next_guesses.add(word) | |
for word in middle_set: | |
dummy_list = [] | |
for bad_lett, bad_pos in incorrect_positions: | |
if bad_pos < len(word): | |
if word[bad_pos] == bad_lett: | |
dummy_list.append(1) | |
if len(dummy_list) > 0: | |
potential_next_guesses.remove(word) | |
# print(f"GUESS {guess_num} : TEST 3-1") | |
if return_stats == False: | |
if verbose == True: | |
print(f"At this point:") | |
print(f"\t{len(word_list) - len(potential_next_guesses)}, {round((len(word_list) - len(potential_next_guesses)) / len(word_list) * 100, 2)}% of total words have been eliminated, and") | |
print(f"\t{len(potential_next_guesses)}, {round(len(potential_next_guesses) / len(word_list) * 100, 2)}% of total words remain possible.\n") | |
# record_list.append(f"At this point:") | |
record_list.append(f"{len(word_list) - len(potential_next_guesses)} ({round((len(word_list) - len(potential_next_guesses)) / len(word_list) * 100, 2)}% of all) words have been eliminated, and {len(potential_next_guesses)} ({round(len(potential_next_guesses) / len(word_list) * 100, 2)}% of all) words are still possible.\n") | |
reduction_per_guess.append(len(potential_next_guesses)) | |
#### Guessing next word | |
if len(potential_next_guesses) == 1: | |
if return_stats == False: | |
if verbose == True: | |
# print(f"All potential next guesses:\n\t{word_ratings}\n") | |
# print(f"Words guessed so far:\n\t{guessed_words}.\n") | |
# record_list.append(f"Potential next guesses: {word_ratings}\n") | |
record_list.append(f"Words guessed so far: {guessed_words}.\n") | |
# print(f"The only remaining possible word is:\n\t'{list(potential_next_guesses)[0]}'\n") | |
record_list.append(f"The only remaining possible word is: '{list(potential_next_guesses)[0]}'\n") | |
guess = list(potential_next_guesses)[0] | |
guess_entropies.append(get_word_entropy([guess], word_list, normalized = True, ascending = False)[0][1]) | |
else: | |
best_next_guesses = list(potential_next_guesses) | |
# print (best_next_guesses) | |
word_ratings = get_word_entropy(best_next_guesses, word_list, normalized = True, ascending = False) # "internal" ratings | |
# Get max rating of all words | |
max_rating = -np.inf | |
for word, rating in word_ratings: | |
if rating > max_rating: | |
max_rating = rating | |
# add best rated words (all equally best entropy in next guess list) to set | |
best_of_the_best_1 = [] | |
for word, rating in word_ratings: | |
if rating == max_rating: | |
best_of_the_best_1.append(word) | |
# only using top ten most frequent prefixes suffixes to bias. After that it the impact is especially negligible | |
test_starts = get_gram_freq(word_list = word_list, letters_length = 1, position = "start", search = None)[:10] | |
test_ends = get_gram_freq(word_list = word_list, letters_length = 1, position = "end", search = None)[:10] | |
# list of the best words that also have the most frequent starting and ending letters (suffixes and prefixes didn't have an impact) | |
best_of_the_best_2 = [] | |
for start_gram, start_count in test_starts: | |
for end_gram, end_count in test_ends: | |
for word in best_of_the_best_1: | |
if word[:1] == start_gram and word[-1:] == end_gram: | |
best_of_the_best_2.append(word) | |
if len(best_of_the_best_2) > 0: | |
guess = best_of_the_best_2[0] | |
else: | |
guess = best_of_the_best_1[0] # they're all equally the best of the best possible guesses so just pick the first | |
# guess_entropies.append(get_word_entropy([guess], word_list, normalized = True, ascending = False)[0][1]) | |
if return_stats == False: | |
if verbose == True: | |
if len(word_ratings) <= 40: | |
print(f"All potential next guesses:\n\t{word_ratings}\n") | |
print(f"Words guessed so far:\n\t{guessed_words}.\n") | |
record_list.append(f"Potential next guesses: {word_ratings}\n") | |
record_list.append(f"Words guessed so far: {guessed_words}.\n") | |
else: | |
print(f"The top 40 potential next guesses are:\n\t{word_ratings[:40]}\n") | |
print(f"Words guessed so far:\n\t{guessed_words}.\n") | |
record_list.append(f"The top 40 potential next guesses are: {word_ratings[:40]}\n") | |
record_list.append(f"Words guessed so far: {guessed_words}.\n") | |
guess_entropies.append(get_word_entropy([guess], word_list, normalized = True, ascending = False)[0][1]) | |
#### Guess has now been made -- what to do next | |
if guess_num == max_guesses: # if at max guesses allowed | |
guessed_words.append(guess) | |
stats_dict['target_guessed'] = False | |
if return_stats == False: | |
if verbose == True: | |
# print("-----------------------------\n") | |
print(f"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.\n") | |
print(f"The target word was '{target}'. Better luck next time!\n") | |
print("-----------------------------\n") | |
record_list.append(f"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.\n") | |
record_list.append(f"The target word was '{target}'. Better luck next time!\n") | |
record_list.append("-----------------------------\n") | |
else: | |
print(f"\nUnfortunately, the Wordle could not be solved in {max_guesses} guesses.") | |
print(f"The target word was '{target}'. Better luck next time!\n") | |
record_list.append(f"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.") | |
record_list.append(f"The target word was '{target}'. Better luck next time!\n") | |
break | |
else: # if not at max guesses yet allowed | |
# stats_dict['target_guessed'] = False | |
if return_stats == False: | |
if verbose == True: | |
print(f"Next guess:\n\t'{guess}'") | |
print("\n-----------------------------\n") | |
record_list.append(f"Next guess: '{guess}'") | |
record_list.append("-----------------------------\n") | |
if guess == target: | |
guess_num += 1 | |
guessed_words.append(guess) | |
stats_dict['target_guessed'] = True | |
if return_stats == False: | |
print(f"Guess {guess_num}: '{guess}'\n") | |
print(f"Congratulations! The Wordle has been solved in {guess_num} guesses!") | |
record_list.append(f"Guess {guess_num}: '{guess}'\n") | |
record_list.append(f"Congratulations! The Wordle has been solved in {guess_num} guesses!") | |
if max_guesses - guess_num == 0: | |
print(f"Lucky! It was the last guess.") | |
record_list.append(f"Lucky! It was the last guess.") | |
else: | |
print(f"There were still {max_guesses - guess_num} guesses remaining.") | |
record_list.append(f"There were still {max_guesses - guess_num} guesses remaining.") | |
if return_stats == False: | |
# stats_dict['target_guessed'] = True | |
print(f"\nThe target word was '{target}'.") | |
print("\n-----------------------------") | |
record_list.append(f"The target word was '{target}'.") | |
record_list.append("-----------------------------") | |
break | |
#### STATS STUFF | |
mid_guesses_vows = 0 | |
mid_guesses_cons = 0 | |
avg_perf_letters = 0 | |
avg_wrong_pos_letters = 0 | |
avg_wrong_letters = 0 | |
for i, word in enumerate(guessed_words): | |
mid_guesses_vows += count_vows_cons(word, y_vow = True)['vows'] | |
mid_guesses_cons += count_vows_cons(word, y_vow = True)['cons'] | |
for i in range(0, len(guessed_words) - 1): | |
avg_perf_letters += perfect_letts_per_guess[i] | |
avg_wrong_pos_letters += wrong_pos_per_guess[i] | |
avg_wrong_letters += wrong_letts_per_guess[i] | |
stats_dict['mid_guesses_avg_vows'] = float(round(mid_guesses_vows / len(guessed_words), 2)) | |
stats_dict['mid_guesses_avg_cons'] = float(round(mid_guesses_cons / len(guessed_words), 2)) | |
stats_dict['avg_perf_letters'] = float(round(np.mean(avg_perf_letters), 2)) | |
stats_dict['avg_wrong_pos_letters'] = float(round(np.mean(avg_wrong_pos_letters), 2)) | |
stats_dict['avg_wrong_letters'] = float(round(np.mean(avg_wrong_letters), 2)) | |
# average number of words remaining after each guess -- the higher this is, the luckier the person got (the lower, the more guesses it took) | |
stats_dict['avg_remaining'] = float(round(np.mean(reduction_per_guess), 2)) | |
# avg entropy of each guessed word relative to all other words possible at that moment -- this should consistently be 100 for the algorithm, but will be different for user | |
if len(guess_entropies) > 1: # in case of guessing it correctly on the first try | |
sum_entropies = 0 | |
for entropy in guess_entropies: | |
sum_entropies += entropy | |
average_entropy = float(round(sum_entropies / len(guess_entropies), 2)) | |
stats_dict['avg_intermediate_guess_entropy'] = average_entropy | |
else: | |
stats_dict['avg_intermediate_guess_entropy'] = float(100) | |
expected_guesses = 3.85 | |
# guess_num = 3 | |
# average_entropy = 95 | |
luck = round(1 - ((((guess_num / expected_guesses) * (stats_dict['avg_intermediate_guess_entropy'] / 100)) / max_guesses) * 5), 2) | |
stats_dict['luck'] = luck | |
if record == True: | |
if verbose == True: | |
with open(f"solutions/{guessed_words[0]}_{target}_wizard_detailed.txt", "w") as fout: | |
for line in record_list: | |
fout.write(line + "\n") # write each line of list of printed text to .txt file | |
else: | |
with open(f"solutions/{guessed_words[0]}_{target}_wizard_summary.txt", "w") as fout: | |
for line in record_list: | |
fout.write(line + "\n") # write | |
# if guess_num <= len(guess): | |
if guess_num <= 6: | |
stats_dict['valid_success'] = True | |
else: | |
stats_dict['valid_success'] = False | |
stats_dict['num_guesses'] = float(guess_num) | |
# if return_stats == True: | |
# return stats_dict | |
if hf_mod == True: | |
return record_list | |
############################################################################################################################################################ | |
############################################################################################################################################################ | |
############################################################################################################################################################ | |
############################################################################################################################################################ | |
def get_gram_freq(word_list: list, letters_length: int = 2, position: bool = "start", search: any = None): | |
""" | |
Given a word list, a selected number of letter, a selected word position to start from ("start" or "end"), | |
and an optional gram to search within the list, this function will get a frequency distribution of all n-grams | |
from the passed word list and returned a frequency distribution in descending order. | |
Parameters: | |
------ | |
`word_list`: list | |
list of words of the same | |
`letters_length`: int | |
number of letters in succession. Size/length of "gram". Must be between 1 and length of words in word list | |
`position`: bool | |
Whether to start the gram from the start of the word (like a prefix) or the end of the word (like a suffix) | |
`search`: str | |
If != None, string of characters to search for within the generated list. If string not found in list, function will print an error message. | |
Returns: | |
------ | |
`tup`: tuple | |
If search != None, will return a tuple with the passed search criteria, and its count | |
`sorted_gram_list`: list | |
List of tuples in the form of (gram, count) for each combination of the gram size in the pass word_list | |
""" | |
gram_freq_dist = {} | |
for word in word_list: | |
if position == "start": | |
gram = word[:letters_length] # first 2 letters | |
if position == "end": | |
gram = word[-(letters_length):] # first 2 letters | |
if gram not in gram_freq_dist: | |
gram_freq_dist[gram] = 1 | |
else: | |
gram_freq_dist[gram] += 1 | |
sorted_gram_dist = sorted(gram_freq_dist.items(), key = operator.itemgetter(1), reverse = True) | |
if search: | |
nos = [] | |
for tup in sorted_gram_dist: | |
if tup[0] == search: | |
return tup | |
else: | |
nos.append("not here") | |
if len(nos) == len(sorted_gram_dist): | |
print ("Search criteria not found in list. Please enter a gram from within the list.") | |
else: | |
return sorted_gram_dist | |
def compare_wordle(word_list: list, max_guesses: int = None, guess_list: list = None, | |
player: str = None, target: str = None, | |
verbose: bool = False, | |
return_stats: bool = False, record: bool = False): | |
""" | |
Mimicking the popular web game, this function matches a current word to a target word automatically, in the most statistically optimal way possible. | |
------ | |
Parameters: | |
------ | |
`word_list`: list | |
list of valid words to be considered | |
`target`: str | |
a string -- must be the same length as `opening_word` | |
`max_guesses`: int | |
the maximum number of attempts allowed to solve the Wordle | |
`verbose`: bool | |
if True, prints progress and explanation of how function solves the puzzle. If False, prints only the guessed word at each guess. | |
`return_stats`: bool | |
if True, prints nothing and returns a dictionary of various statistics about the function's performance trying to solve the puzzle | |
`record`: bool | |
if True, creates a .txt file with the same information printed according to the indicated verbosity | |
------ | |
Returns: | |
------ | |
`stats_dict`: dict | |
dictionary containing various statistics about the function's performance trying to solve the puzzle | |
""" | |
stats_dict = {} | |
# official_words list seems to not be 100% the same as the real game, so this adds new words to it | |
for word in guess_list: | |
if word not in word_list: | |
word_list.append(word) | |
guess = guess_list[0] | |
first_guess = guess_list[0] | |
stats_dict['first_guess'] = guess | |
stats_dict['target_word'] = target | |
stats_dict['first_guess_vowels'] = float(count_vows_cons(guess, y_vow = True)['vows']) | |
stats_dict['first_guess_consonants'] = float(count_vows_cons(guess, y_vow = True)['cons']) | |
stats_dict['target_vowels'] = float(count_vows_cons(target, y_vow = True)['vows']) | |
stats_dict['target_consonants'] = float(count_vows_cons(target, y_vow = True)['cons']) | |
# get entropy of the first guess word and target word in the entire word_list | |
for tup in get_word_entropy(word_list, word_list, normalized = True): | |
if tup[0] == guess: | |
stats_dict['first_guess_entropy'] = tup[1] | |
if tup[0] == target: | |
stats_dict['target_entropy'] = tup[1] | |
guess_entropies = [] | |
guess_entropies.append(stats_dict['first_guess_entropy']) | |
english_alphabet = "abcdefghijklmnopqrstuvwxyz" | |
word_list_sorted_counts = get_letter_counts(english_alphabet, word_list, sort = "descending") | |
wordlen = len(guess) | |
letter_positions = set(i for i in range(0, wordlen)) | |
guess_set = set() | |
perfect_dict = {} | |
wrong_pos_dict = {} | |
wrong_pos_set = set() | |
dont_guess_again = set() | |
guessed_words = [] # running set of guessed words | |
guess_num = 0 # baseline for variable | |
dont_guess_words = set() | |
incorrect_positions = [] | |
reduction_per_guess = [] | |
if max_guesses == None: # if no value is passed, default is len(guess) | |
max_guesses = wordlen | |
else: # else it is the value passed | |
max_guesses = max_guesses | |
perfect_letts_per_guess = [] | |
wrong_pos_per_guess = [] | |
wrong_letts_per_guess = [] | |
record_list = [] | |
while guess: # while there is any guess -- there are conditions to break it at the bottom | |
guess_num += 1 | |
guessed_words.append(guess) | |
# if drama: | |
# time.sleep(drama) | |
# guess_num += 1 # each time the guess is processed | |
if return_stats == False: | |
if guess_num == 1: | |
print("-----------------------------\n") | |
record_list.append("-----------------------------\n") | |
if return_stats == False: | |
print(f"Guess {guess_num}: '{guess}'") | |
record_list.append(f"Guess {guess_num}: '{guess}'") | |
if guess == target: | |
stats_dict['target_guessed'] = True | |
if return_stats == False: | |
if guess_num == 1: | |
print(f"Congratulations! The Wordle has been solved in {guess_num} guess, that's amazingly lucky!") | |
print(f"The target word was {target}") | |
record_list.append(f"Congratulations! The Wordle has been solved in {guess_num} guess, that's amazingly lucky!") | |
record_list.append(f"The target word was '{target}'.") | |
perfect_letts_per_guess.append(5) | |
wrong_pos_per_guess.append(0) | |
wrong_letts_per_guess.append(0) | |
break | |
guess_set = set() | |
wrong_pos_set = set() | |
#### Step 2 -- ALL PERFECT | |
for i in letter_positions: # number of letters in each word (current word and target word) | |
guess_set.add(guess[i]) | |
if guess[i] not in perfect_dict: | |
perfect_dict[guess[i]] = set() | |
if guess[i] not in wrong_pos_dict: | |
wrong_pos_dict[guess[i]] = set() | |
### EVALUATE CURRENT GUESS | |
if guess[i] == target[i]: # letter == correct and position == correct | |
perfect_dict[guess[i]].add(i) | |
if (guess[i] != target[i] and guess[i] in target): # letter == correct and position != correct | |
wrong_pos_dict[guess[i]].add(i) | |
wrong_pos_set.add(guess[i]) | |
if guess[i] not in target: # if letter is not relevant at all | |
dont_guess_again.add(guess[i]) | |
#### Step 3 -- ALL PERFECT | |
next_letters = set() | |
for letter, positions in perfect_dict.items(): | |
if len(positions) > 0: | |
next_letters.add(letter) | |
for letter, positions in wrong_pos_dict.items(): | |
if len(positions) > 0: | |
next_letters.add(letter) | |
#### List of tuples of correct letter positions in new valid words. Eg: [('e', 2), ('a', 3)] | |
perfect_letters = [] | |
for letter, positions in perfect_dict.items(): | |
for pos in positions: | |
if len(positions) > 0: | |
perfect_letters.append((letter, pos)) | |
#### all words that have correct letters in same spots | |
words_matching_correct_all = [] | |
for word in word_list: | |
word_set = set() | |
for letter, pos in perfect_letters: | |
if word[pos] == letter: | |
words_matching_correct_all.append(word) | |
#### excluding words with letters in known incorrect positions | |
for letter, positions in wrong_pos_dict.items(): | |
for pos in positions: | |
if len(positions) > 0: | |
if (letter, pos) not in incorrect_positions: | |
incorrect_positions.append((letter, pos)) | |
# sorting lists of tuples just to make them look nice in the printout | |
incorrect_positions = sorted(incorrect_positions, key = operator.itemgetter(1), reverse = False) | |
perfect_letters = sorted(perfect_letters, key = operator.itemgetter(1), reverse = False) | |
#### all words that have correct letters in incorrect spots -- so they can be excluded efficiently | |
# print(incorrect_positions) | |
for word in word_list: | |
word_set = set() | |
for letter, pos in incorrect_positions: | |
if word[pos] == letter: | |
dont_guess_words.add(word) | |
for word in word_list: | |
word_set = set() | |
for letter, pos in incorrect_positions: | |
if word[pos] == letter: | |
dont_guess_words.add(word) | |
for bad_letter in dont_guess_again: | |
for word in word_list: | |
if (bad_letter in word and word not in dont_guess_words): | |
dont_guess_words.add(word) | |
if return_stats == False: | |
if verbose == True: | |
print(f"Letters in correct positions:\n\t{perfect_letters}\n") | |
print(f"Letters in incorrect positions:\n\t{incorrect_positions}\n") | |
print (f"Letters to guess again:\n\t{sorted(list(next_letters), reverse = False)}\n") | |
print(f"Letters to not guess again:\n\t{sorted(list(dont_guess_again), reverse = False)}\n") # works | |
record_list.append(f"Letters in correct positions:\n\t{perfect_letters}\n") | |
record_list.append(f"Letters in incorrect positions:\n\t{incorrect_positions}\n") | |
record_list.append(f"Letters to guess again:\n\t{sorted(list(next_letters), reverse = False)}\n") | |
record_list.append(f"Letters to not guess again:\n\t{sorted(list(dont_guess_again), reverse = False)}\n") # works | |
# Returns True | |
# print(A.issubset(B)) # "if everything in A is in B", returns Bool | |
perfect_letts_per_guess.append(len(perfect_letters)) | |
wrong_pos_per_guess.append(len(incorrect_positions)) | |
wrong_letts_per_guess.append(len(dont_guess_again)) | |
potential_next_guesses = set() | |
middle_set = set() | |
if len(perfect_letters) == 0 and len(incorrect_positions) == 0: # if there are NEITHER perfect letters, NOR incorrect positions, .... | |
for word in word_list: | |
if word not in dont_guess_words: | |
if word not in guessed_words: | |
potential_next_guesses.add(word) | |
# print(f"GUESS {guess_num} : TEST 1-1") | |
if len(perfect_letters) == 0 and len(incorrect_positions) != 0: # if there are no perfect letters whatsoever, but there ARE incorrect positions .... | |
for word in word_list: | |
for incor_letter, incor_pos in incorrect_positions: | |
if word[incor_pos] != incor_letter: | |
if word not in dont_guess_words: # just in case | |
word_set = set() | |
for letter in word: | |
word_set.add(letter) | |
if next_letters.issubset(word_set): | |
if word not in guessed_words: | |
if len(dont_guess_again) > 0: | |
for bad_letter in dont_guess_again: | |
if bad_letter not in word: | |
# potential_next_guesses.append(word) | |
potential_next_guesses.add(word) | |
else: | |
potential_next_guesses.add(word) | |
# print(f"GUESS {guess_num} : TEST 2-1") | |
else: | |
for word in word_list: | |
if word not in dont_guess_words: # just in case | |
word_set = set() | |
for letter in word: | |
word_set.add(letter) | |
if next_letters.issubset(word_set): | |
if word not in guessed_words: | |
# print ("TEST 3-2") | |
if len(dont_guess_again) > 0: | |
for bad_letter in dont_guess_again: | |
if bad_letter not in word: | |
middle_set.add(word) | |
else: | |
middle_set.add(word) | |
for word in middle_set: | |
dummy_list = [] | |
for good_lett, good_pos in perfect_letters: | |
if word[good_pos] == good_lett: | |
dummy_list.append(1) | |
if len(dummy_list) == len(perfect_letters): | |
potential_next_guesses.add(word) | |
for word in middle_set: | |
dummy_list = [] | |
for bad_lett, bad_pos in incorrect_positions: | |
if word[bad_pos] == bad_lett: | |
dummy_list.append(1) | |
if len(dummy_list) > 0: | |
potential_next_guesses.remove(word) | |
# print(f"GUESS {guess_num} : TEST 3-1") | |
if return_stats == False: | |
if verbose == True: | |
print(f"At this point:") | |
print(f"\t{len(word_list) - len(potential_next_guesses)}, {round((len(word_list) - len(potential_next_guesses)) / len(word_list) * 100, 2)}% of total words have been eliminated, and") | |
print(f"\t{len(potential_next_guesses)}, {round(len(potential_next_guesses) / len(word_list) * 100, 2)}% of total words remain possible.\n") | |
# record_list.append(f"At this point:") | |
record_list.append(f"\t{len(word_list) - len(potential_next_guesses)}, {round((len(word_list) - len(potential_next_guesses)) / len(word_list) * 100, 2)}% of total words have been eliminated, and") | |
record_list.append(f"\t{len(potential_next_guesses)}, {round(len(potential_next_guesses) / len(word_list) * 100, 2)}% of total words remain possible.\n") | |
reduction_per_guess.append(len(potential_next_guesses)) | |
#### Guessing next word | |
if len(potential_next_guesses) == 1: | |
if return_stats == False: | |
if verbose == True: | |
print(f"The only remaining possible word is:\n\t'{list(potential_next_guesses)[0]}'\n") | |
record_list.append(f"The only remaining possible word is:\n\t'{list(potential_next_guesses)[0]}'\n") | |
# guess = list(potential_next_guesses)[0] | |
del guess_list[0] | |
# print (guess_list) | |
guess = guess_list[0] | |
guess_entropies.append(get_word_entropy([guess], word_list, normalized = True, ascending = False)[0][1]) | |
else: | |
best_next_guesses = list(potential_next_guesses) | |
word_ratings = get_word_entropy(best_next_guesses, word_list, normalized = True, ascending = False) # "internal" ratings | |
del guess_list[0] | |
# print (guess_list) | |
guess = guess_list[0] | |
guess_entropies.append(get_word_entropy([guess], word_list, normalized = True, ascending = False)[0][1]) | |
if return_stats == False: | |
if verbose == True: | |
if len(word_ratings) <= 40: | |
print(f"All potential next guesses:\n\t{word_ratings}\n") | |
print(f"Words guessed so far:\n\t{guessed_words}.\n") | |
record_list.append(f"Potential next guesses:\n\t{word_ratings}\n") | |
record_list.append(f"Words guessed so far:\n\t{guessed_words}.\n") | |
else: | |
print(f"The top 40 potential next guesses are:\n\t{word_ratings[:40]}\n") | |
print(f"Words guessed so far:\n\t{guessed_words}.\n") | |
record_list.append(f"The top 40 potential next guesses are::\n\t{word_ratings[:40]}\n") | |
record_list.append(f"Words guessed so far:\n\t{guessed_words}.\n") | |
#### Guess has now been made -- what to do next | |
if guess_num == max_guesses: # if at max guesses allowed | |
guessed_words.append(guess) | |
stats_dict['target_guessed'] = False | |
if return_stats == False: | |
if verbose == True: | |
# print("-----------------------------\n") | |
print(f"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.\n") | |
print(f"The target word was '{target}'. Better luck next time!\n") | |
print("-----------------------------\n") | |
record_list.append(f"Unfortunately, the Wordle could not be solved in {max_guesses} guesses.\n") | |
record_list.append(f"The target word was '{target}'. Better luck next time!\n") | |
record_list.append("-----------------------------\n") | |
else: | |
print(f"\nUnfortunately, the Wordle could not be solved in {max_guesses} guesses.") | |
print(f"The target word was '{target}'. Better luck next time!\n") | |
record_list.append(f"\nUnfortunately, the Wordle could not be solved in {max_guesses} guesses.") | |
record_list.append(f"The target word was '{target}'. Better luck next time!\n") | |
break | |
else: # if not at max guesses yet allowed | |
# stats_dict['target_guessed'] = False | |
if return_stats == False: | |
if verbose == True: | |
print(f"Next guess:\n\t'{guess}'") | |
print("\n-----------------------------") | |
record_list.append(f"Next guess: '{guess}'") | |
record_list.append("-----------------------------\n") | |
if guess == target: | |
guess_num += 1 | |
guessed_words.append(guess) | |
stats_dict['target_guessed'] = True | |
if return_stats == False: | |
print(f"Guess {guess_num}: '{guess}'\n") | |
print(f"Congratulations! The Wordle has been solved in {guess_num} guesses!") | |
record_list.append(f"Guess {guess_num}: '{guess}'\n") | |
record_list.append(f"Congratulations! The Wordle has been solved in {guess_num} guesses!") | |
if max_guesses - guess_num == 0: | |
print(f"Lucky! It was the last guess.") | |
record_list.append(f"Lucky! It was the last guess.") | |
else: | |
print(f"There were still {max_guesses - guess_num} guesses remaining.") | |
record_list.append(f"There were still {max_guesses - guess_num} guesses remaining.") | |
if return_stats == False: | |
# stats_dict['target_guessed'] = True | |
print(f"\nThe target word was '{target}'.") | |
print("\n-----------------------------") | |
record_list.append(f"The target word was '{target}'.") | |
record_list.append("-----------------------------") | |
break | |
#### STATS STUFF | |
mid_guesses_vows = 0 | |
mid_guesses_cons = 0 | |
avg_perf_letters = 0 | |
avg_wrong_pos_letters = 0 | |
avg_wrong_letters = 0 | |
for i, word in enumerate(guessed_words): | |
mid_guesses_vows += count_vows_cons(word, y_vow = True)['vows'] | |
mid_guesses_cons += count_vows_cons(word, y_vow = True)['cons'] | |
for i in range(0, len(guessed_words) - 1): | |
avg_perf_letters += perfect_letts_per_guess[i] | |
avg_wrong_pos_letters += wrong_pos_per_guess[i] | |
avg_wrong_letters += wrong_letts_per_guess[i] | |
stats_dict['mid_guesses_avg_vows'] = float(round(mid_guesses_vows / len(guessed_words), 2)) | |
stats_dict['mid_guesses_avg_cons'] = float(round(mid_guesses_cons / len(guessed_words), 2)) | |
stats_dict['avg_perf_letters'] = float(round(np.mean(avg_perf_letters), 2)) | |
stats_dict['avg_wrong_pos_letters'] = float(round(np.mean(avg_wrong_pos_letters), 2)) | |
stats_dict['avg_wrong_letters'] = float(round(np.mean(avg_wrong_letters), 2)) | |
# average number of words remaining after each guess -- the higher this is, the luckier the person got (the lower, the more guesses it took) | |
stats_dict['avg_remaining'] = float(round(np.mean(reduction_per_guess), 2)) | |
# avg entropy of each guessed word relative to all other words possible at that moment -- this should consistently be 100 for the algorithm, but will be different for user | |
if len(guess_entropies) > 1: # in case of guessing it correctly on the first try | |
sum_entropies = 0 | |
for entropy in guess_entropies: | |
sum_entropies += entropy | |
average_entropy = float(round(sum_entropies / len(guess_entropies), 2)) | |
stats_dict['avg_intermediate_guess_entropy'] = average_entropy | |
else: | |
stats_dict['avg_intermediate_guess_entropy'] = float(100) | |
# stats_dict['bias'] = bias | |
if record == True: | |
if verbose == True: | |
with open(f"solutions/{guessed_words[0]}_{target}_wizard_detailed.txt", "w") as fout: | |
for line in record_list: | |
fout.write(line + "\n") # write each line of list of printed text to .txt file | |
else: | |
with open(f"solutions/{guessed_words[0]}_{target}_wizard_summary.txt", "w") as fout: | |
for line in record_list: | |
fout.write(line + "\n") # write | |
# if guess_num <= len(guess): | |
if guess_num <= 6: | |
stats_dict['valid_success'] = True | |
else: | |
stats_dict['valid_success'] = False | |
stats_dict['player'] = player | |
stats_dict['num_guesses'] = float(guess_num) | |
wizard_dict = wordle_wizard(word_list = word_list, max_guesses = max_guesses, | |
guess = first_guess, target = target, bias = 'entropy', | |
random_guess = False, random_target = False, | |
verbose = False, drama = 0, return_stats = return_stats, record = False) | |
wizard_dict['player'] = "wizard" | |
del wizard_dict['bias'] # leftover from the wordle_wizard() output stats_dict, but isn't relevant anymore | |
wizard_dict['luck'] = 0 | |
wizard_dict['expected_guesses'] = wizard_dict['num_guesses'] | |
stats_dict['expected_guesses'] = wizard_dict['num_guesses'] | |
expected_guesses = wizard_dict['num_guesses'] | |
# stats_dict['luck'] = round((1 - (guess_num / expected_guesses)) * (stats_dict['avg_intermediate_guess_entropy'] / 100), 2) | |
stats_dict['luck'] = round((1 - ((guess_num / expected_guesses)) * (stats_dict['avg_intermediate_guess_entropy'] / 100)), 2) | |
stats_master = {} | |
for metric, result in stats_dict.items(): | |
if metric in stats_master: | |
stats_master[metric].append(result) | |
else: | |
stats_master[metric] = [] | |
stats_master[metric].append(result) | |
for metric, result in wizard_dict.items(): | |
stats_master[metric].append(result) | |
if return_stats == True: | |
return stats_master | |
def convert_row(df, row): | |
""" | |
Converts row of passed pandas dataFrame object into usable inputs for `compare_wordle()` function | |
------ | |
Parameters: | |
------ | |
`df`: pandas df object | |
pandas dataFrame object | |
`row`: int | |
row number of pd df object | |
------ | |
Returns: | |
------ | |
3-tuple containing: | |
`guess_list`: list | |
list of words guessed in this playthrough of passed iteration of the puzzle | |
`target`: str | |
target word of passed iteration of the puzzle | |
`player`: str | |
name of player of passed iteration of the puzzle | |
""" | |
df.fillna("none", inplace = True) | |
df.loc[row, :].str.lower() | |
list_1 = df.loc[row, :].tolist() | |
# print(list_1) | |
player = list_1[0] | |
del list_1[0] | |
target = list_1[0] | |
del list_1[0] | |
to_delete = [] | |
for i, word in enumerate(list_1): | |
if word == "none": | |
to_delete.append(i) | |
to_delete = sorted(to_delete, reverse = True) # this has to be done or else index will be eventually be out of range | |
for pos in to_delete: | |
del list_1[pos] | |
guess_list = list_1 | |
lower_player = player.lower() | |
lower_target = target.lower() | |
lower_guess_list = [word.lower() for word in guess_list] | |
return (lower_player, lower_target, lower_guess_list) | |
def create_compared_df(player_df, to_csv: bool = False, show_shapes: bool = False): | |
""" | |
Creates master df of player wordle scores compared to how wordle_wizard would perform on the same puzzles | |
Parameters: | |
----- | |
`player_df`: Pandas dataFrame object | |
df of player scores of wordle puzzles | |
`to_csv`: bool | |
If True, writes returned df to csv | |
`show_shapes`: bool | |
If True, prints shape of new df before and after deleting duplicate rows (created by wordle_wizard running the same puzzles multiple times) | |
Returns: | |
----- | |
`df_master`: Pandas dataFrame object | |
df of player scores and wordle_wizard scores of wordle puzzles | |
""" | |
stats_master = {} | |
excepts = [] | |
for row in player_df.index: | |
player = convert_row(player_df, row)[0] | |
target_word = convert_row(player_df, row)[1] | |
guess_list = convert_row(player_df, row)[2] | |
try: | |
complete = compare_wordle(word_list = official_words, max_guesses = 6, | |
guess_list = guess_list, player = player, target = target_word, | |
verbose = True, return_stats = True, record = False) | |
for metric, results in complete.items(): | |
if metric in stats_master: | |
for result in results: | |
stats_master[metric].append(result) | |
else: | |
stats_master[metric] = [] | |
for result in results: | |
stats_master[metric].append(result) | |
except: | |
AttributeError | |
excepts.append(guess_list) | |
df_master = pd.DataFrame(stats_master) | |
# print(df_master.columns.tolist()) | |
# Re-organizing columns to a more logical order (for viewing) | |
df_master = df_master[['first_guess', 'target_word', 'player', 'num_guesses', 'expected_guesses', 'luck', 'first_guess_vowels', 'first_guess_consonants', | |
'target_vowels', 'target_consonants', 'first_guess_entropy', 'target_entropy', | |
'target_guessed', 'mid_guesses_avg_vows', 'mid_guesses_avg_cons', 'avg_perf_letters', | |
'avg_wrong_pos_letters', 'avg_wrong_letters', 'avg_remaining', 'avg_intermediate_guess_entropy', | |
'valid_success']] | |
# print(excepts) | |
if show_shapes == True: | |
print(df_master.shape) # check shape before deleting dups | |
# Delete duplicate rows (some created by process) | |
df_master.drop_duplicates(inplace = True) | |
if to_csv == True: | |
df_master.to_csv('compared_data/players_compared.csv') # write new data to csv | |
if show_shapes == True: | |
print(df_master.shape) # check shape after deleting dups | |
return df_master |