Vrk commited on
Commit
72322f6
1 Parent(s): f95c696

Upload Embeddings.py

Browse files
Files changed (1) hide show
  1. Embeddings.py +27 -0
Embeddings.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+
4
+ def load_glove_embeddings(embeddings_file):
5
+ """Load embeddings from a file."""
6
+ embeddings = {}
7
+ with open(embeddings_file, "r", encoding="utf8") as fp:
8
+ for index, line in enumerate(fp):
9
+ values = line.split()
10
+ word = values[0]
11
+ embedding = np.asarray(values[1:], dtype='float32')
12
+ embeddings[word] = embedding
13
+ return embeddings
14
+
15
+ def make_embeddings_matrix(embeddings, word_index, embedding_dim):
16
+ """Create embeddings matrix to use in Embedding layer."""
17
+ embedding_matrix = np.zeros((len(word_index), embedding_dim))
18
+ for word, i in word_index.items():
19
+ embedding_vector = embeddings.get(word)
20
+ if embedding_vector is not None:
21
+ embedding_matrix[i] = embedding_vector
22
+ return embedding_matrix
23
+
24
+ def get_embeddings(embedding_file_path, tokenizer, embedding_dim):
25
+ glove_embeddings = load_glove_embeddings(embeddings_file=embedding_file_path)
26
+ embedding_matrix = make_embeddings_matrix(embeddings=glove_embeddings, word_index=tokenizer.token_to_index, embedding_dim=embedding_dim)
27
+ return embedding_matrix