import pickle import gzip def get_unique_words(corpus_filename): """ Get a list of unique words from a corpus file """ unique_words = set() with open(corpus_filename, 'r', encoding='utf-8') as file: for line in file: words = line.strip().split() unique_words.update(words) return list(unique_words) def save_compressed_word_list(words, filename): """ Save a list of words to a compressed file """ with gzip.open(filename, 'wb') as file: pickle.dump(words, file) def load_compressed_word_list(filename): """ Load a list of words from a compressed file """ with gzip.open(filename, 'rb') as file: return pickle.load(file) def get_autocomplete(input_word=" ", all_words=" "): """ Get a list of words that start with the input word """ return [word for word in all_words if word.startswith(input_word)]