emiliosheinz commited on
Commit
6b02e3d
1 Parent(s): 43d8e37

create app.py with static string comparison

Browse files
Files changed (1) hide show
  1. app.py +22 -0
app.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
2
+
3
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
4
+ model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased")
5
+
6
+ # example sentences
7
+ sentence1 = "O Brasil é o maior país da América do Sul"
8
+ sentence2 = "A Argentina é o segundo maior país da América do Sul"
9
+
10
+ # tokenize the sentences
11
+ inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt")
12
+
13
+ # get the output logits for the sentence pair classification task
14
+ outputs = model(**inputs).logits
15
+
16
+ # calculate the softmax probabilities for the two classes (similar or dissimilar)
17
+ probs = outputs.softmax(dim=1)
18
+
19
+ # the probability of the sentences being similar is the second element of the output array
20
+ similarity_score = probs[0][1].item()
21
+
22
+ print("Similarity score:", similarity_score)