import pickle | |
from autocomplete import save_compressed_word_list | |
import json | |
def compress_words(words): | |
""" | |
Compress a list of words. | |
""" | |
with open(words, 'r') as f: | |
words = f.readlines() | |
compressed_words = [] | |
for word in words: | |
compressed_word = word.strip() | |
compressed_words.append(compressed_word) | |
# Save the compressed words | |
save_compressed_word_list(compressed_words, 'all_lemmas.pkl.gz') | |
def compress_word_list(words): | |
""" | |
Compress a list of words. | |
""" | |
compressed_words = [] | |
for word in words: | |
compressed_word = word.strip() | |
compressed_words.append(compressed_word) | |
# Save the compressed words | |
save_compressed_word_list(compressed_words, 'all_lemmas.pkl.gz') | |
def main(): | |
lemma_dict = json.load(open('lsj_dict.json', 'r')) | |
# Get all lemmas | |
all_lemmas = list(lemma_dict.keys()) | |
# Compress words | |
compress_word_list(all_lemmas) | |
if __name__ == "__main__": | |
main() |