shivanis14 commited on
Commit
9fe408e
1 Parent(s): b37f3f6

Upload 2 files

Browse files
Files changed (2) hide show
  1. create_embeddings.py +23 -0
  2. embeddings.pkl +3 -0
create_embeddings.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from calc_cosine_similarity import find_cosine_similarity, find_embedding , find_relevant_file_paths
2
+ import os
3
+ import pickle
4
+
5
+ embeddings_titles = []
6
+ if not os.path.exists('embeddings.pkl'):
7
+ #Find embeddings of titles from titles.txt
8
+ titles = []
9
+ #if embedding_titles.pkl is absent
10
+ with open('titles.txt', 'r') as file:
11
+ lines = file.readlines()
12
+
13
+ titles = [line.strip() for line in lines]
14
+ print("Created a list of titles")
15
+
16
+ embeddings_titles = find_embedding(titles)
17
+ #Save embeddings_titles to embedding_titles.pkl
18
+ data = {
19
+ 'sentences': titles,
20
+ 'embeddings': embeddings_titles
21
+ }
22
+ with open('embeddings.pkl', 'wb') as f:
23
+ pickle.dump(data, f)
embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:609ad926ecada68e145ec5ec3981c9358986f0c0d64f607c8ee70003e435bcc1
3
+ size 7018423