File size: 987 Bytes
32a03a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from gensim.models import Word2Vec


class word_2_vec_aggregator():
    """
    A wrapper function around gensim used for creating a word 2 vec model
    """

    def get_model(self, list_of_sentences):
        """
        Used to retrieve the model
        :param list_of_sentences:
        :return: the model
        """

        list_of_sentences_in_nested_list = []

        for sentence in list_of_sentences:

            # Skip unigrams
            if " " not in sentence:
                continue

            list_of_sentences_in_nested_list.append(sentence.split(" "))

        model = Word2Vec(min_count=1, window=5)  # vector size of 100 and window size of 5?
        model.build_vocab(list_of_sentences_in_nested_list)  # prepare the model vocabulary
        model.model_trimmed_post_training = False
        model.train(list_of_sentences_in_nested_list, total_examples=model.corpus_count,
                    epochs=model.epochs)  # train word vectors

        return model