agalma / autocomplete.py
Mark7549's picture
updated autocomplete for nearest neighbours
51778ca
raw
history blame
943 Bytes
import pickle
import gzip
def get_unique_words(corpus_filename):
"""
Get a list of unique words from a corpus file
"""
unique_words = set()
with open(corpus_filename, 'r', encoding='utf-8') as file:
for line in file:
words = line.strip().split()
unique_words.update(words)
return list(unique_words)
def save_compressed_word_list(words, filename):
"""
Save a list of words to a compressed file
"""
with gzip.open(filename, 'wb') as file:
pickle.dump(words, file)
def load_compressed_word_list(filename):
"""
Load a list of words from a compressed file
"""
with gzip.open(filename, 'rb') as file:
return pickle.load(file)
def get_autocomplete(input_word=" ", all_words=" "):
"""
Get a list of words that start with the input word
"""
return [word for word in all_words if word.startswith(input_word)]