import pickle | |
import gzip | |
def get_unique_words(corpus_filename): | |
""" | |
Get a list of unique words from a corpus file | |
""" | |
unique_words = set() | |
with open(corpus_filename, 'r', encoding='utf-8') as file: | |
for line in file: | |
words = line.strip().split() | |
unique_words.update(words) | |
return list(unique_words) | |
def save_compressed_word_list(words, filename): | |
""" | |
Save a list of words to a compressed file | |
""" | |
with gzip.open(filename, 'wb') as file: | |
pickle.dump(words, file) | |
def load_compressed_word_list(filename): | |
""" | |
Load a list of words from a compressed file | |
""" | |
with gzip.open(filename, 'rb') as file: | |
return pickle.load(file) | |
def get_autocomplete(input_word=" ", all_words=" "): | |
""" | |
Get a list of words that start with the input word | |
""" | |
return [word for word in all_words if word.startswith(input_word)] |