File size: 1,128 Bytes
4a1b022
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import pandas as pd
import torch
import numpy as np

from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained("deberta-classification-chatrag/checkpoint-6342")
tokenizer = AutoTokenizer.from_pretrained("deberta-classification-chatrag/checkpoint-6342")


result = ["Comment puis-je renouveler un passeport ?", "Combien font deux et deux ?", "Écris un début de lettre de recommandation pour la Dinum"]

result = pd.DataFrame(result, columns=['query'])

complete_probabilities = []

for text in result["query"].tolist():
  encoding = tokenizer(text, return_tensors="pt")
  encoding = {k: v.to(model.device) for k,v in encoding.items()}

  outputs = model(**encoding)

  logits = outputs.logits
  logits.shape

  # apply sigmoid + threshold
  sigmoid = torch.nn.Sigmoid()
  probs = sigmoid(logits.squeeze().cpu())
  predictions = np.zeros(probs.shape)

  # Extract the float value from the tensor
  float_value = probs.item()

  complete_probabilities.append(float_value)

result["prob"] = complete_probabilities

print(result)