luisespinosa commited on
Commit
e16bf00
1 Parent(s): c97f477

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +33 -0
README.md CHANGED
@@ -11,4 +11,37 @@ def preprocess(text):
11
  t = 'http' if t.startswith('http') else t
12
  new_text.append(t)
13
  return " ".join(new_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ```
 
11
  t = 'http' if t.startswith('http') else t
12
  new_text.append(t)
13
  return " ".join(new_text)
14
+
15
+ def get_embedding(text):
16
+ text = preprocess(text)
17
+ encoded_input = tokenizer(text, return_tensors='pt')
18
+ features = model(**encoded_input)
19
+ features = features[0].detach().numpy()
20
+ features_mean = np.mean(features[0], axis=0)
21
+ return features_mean
22
+
23
+ query = "Acabo de pedir pollo frito 🐣" #spanish
24
+
25
+ tweets = ["We had a great time! ⚽️", # english
26
+ "We hebben een geweldige tijd gehad! ⛩", # dutch
27
+ "Nous avons passé un bon moment! 🎥", # french
28
+ "Ci siamo divertiti! 🍝"] # italian
29
+
30
+ d = defaultdict(int)
31
+ for tweet in tweets:
32
+ sim = 1-cosine(get_embedding(query),get_embedding(tweet))
33
+ d[tweet] = sim
34
+
35
+ print('Most similar to: ',query)
36
+ print('----------------------------------------')
37
+ for idx,x in enumerate(sorted(d.items(), key=lambda x:x[1], reverse=True)):
38
+ print(idx+1,x[0])
39
+ ```
40
+ ```
41
+ Most similar to: Acabo de pedir pollo frito 🐣
42
+ ----------------------------------------
43
+ 1 Ci siamo divertiti! 🍝
44
+ 2 Nous avons passé un bon moment! 🎥
45
+ 3 We had a great time! ⚽️
46
+ 4 We hebben een geweldige tijd gehad! ⛩
47
  ```