sabduh77 commited on
Commit
a56fd03
1 Parent(s): 7ff87c5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ !pip install transformers
2
+ from transformers import AutoTokenizer, AutoModel
3
+ from torch.nn import functional as F
4
+ tokenizer = AutoTokenizer.from_pretrained('deepset/sentence_bert')
5
+ model = AutoModel.from_pretrained('deepset/sentence_bert')
6
+
7
+ sentence = 'Who are you voting for in 2020?'
8
+ labels = ['business', 'art & culture', 'politics']
9
+
10
+ # run inputs through model and mean-pool over the sequence
11
+ # dimension to get sequence-level representations
12
+ inputs = tokenizer.batch_encode_plus([sentence] + labels,
13
+ return_tensors='pt',
14
+ pad_to_max_length=True)
15
+ input_ids = inputs['input_ids']
16
+ attention_mask = inputs['attention_mask']
17
+ output = model(input_ids, attention_mask=attention_mask)[0]
18
+ sentence_rep = output[:1].mean(dim=1)
19
+ label_reps = output[1:].mean(dim=1)
20
+
21
+ # now find the labels with the highest cosine similarities to
22
+ # the sentence
23
+ similarities = F.cosine_similarity(sentence_rep, label_reps)
24
+ closest = similarities.argsort(descending=True)
25
+ for ind in closest:
26
+ print(f'label: {labels[ind]} \t similarity: {similarities[ind]}')