Spaces:
Sleeping
Sleeping
from gensim.models import Word2Vec | |
class word_2_vec_aggregator(): | |
""" | |
A wrapper function around gensim used for creating a word 2 vec model | |
""" | |
def get_model(self, list_of_sentences): | |
""" | |
Used to retrieve the model | |
:param list_of_sentences: | |
:return: the model | |
""" | |
list_of_sentences_in_nested_list = [] | |
for sentence in list_of_sentences: | |
# Skip unigrams | |
if " " not in sentence: | |
continue | |
list_of_sentences_in_nested_list.append(sentence.split(" ")) | |
model = Word2Vec(min_count=1, window=5) # vector size of 100 and window size of 5? | |
model.build_vocab(list_of_sentences_in_nested_list) # prepare the model vocabulary | |
model.train(list_of_sentences_in_nested_list, total_examples=model.corpus_count, | |
epochs=model.epochs) # train word vectors | |
return model | |