cointegrated commited on
Commit
2fa46f3
1 Parent(s): 4f6879b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -1
README.md CHANGED
@@ -46,4 +46,27 @@ set | ROC AUC
46
  detox | 0.857112
47
  paraphraser | 0.858465
48
  rupaws_qqp | 0.859195
49
- rupaws_wiki | 0.906121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  detox | 0.857112
47
  paraphraser | 0.858465
48
  rupaws_qqp | 0.859195
49
+ rupaws_wiki | 0.906121
50
+
51
+ Example usage:
52
+
53
+ ```Python
54
+ import torch
55
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
56
+
57
+ model = AutoModelForSequenceClassification.from_pretrained('SkolkovoInstitute/ruRoberta-large-paraphrase-v1')
58
+ tokenizer = AutoTokenizer.from_pretrained('SkolkovoInstitute/ruRoberta-large-paraphrase-v1')
59
+
60
+ def get_similarity(text1, text2):
61
+ """ Predict the probability that two Russian sentences are paraphrases of each other. """
62
+ with torch.inference_mode():
63
+ batch = tokenizer(
64
+ text1, text2,
65
+ truncation=True, max_length=model.config.max_position_embeddings, return_tensors='pt',
66
+ ).to(model.device)
67
+ proba = torch.softmax(model(**batch).logits, -1)
68
+ return proba[0][1].item()
69
+
70
+ print(get_similarity('Я тебя люблю', 'Ты мне нравишься')) # 0.9798
71
+ print(get_similarity('Я тебя люблю', 'Я тебя ненавижу')) # 0.0008
72
+ ```