Emanuela Boros
commited on
Commit
•
3df5bff
1
Parent(s):
8cda137
added confidence
Browse files- generic_nel.py +22 -10
generic_nel.py
CHANGED
@@ -120,16 +120,28 @@ class NelPipeline(Pipeline):
|
|
120 |
wikipedia_predictions = self.tokenizer.batch_decode(
|
121 |
outputs.sequences, skip_special_tokens=True
|
122 |
)
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
# Return the predictions along with the extracted entity, lOffset, and rOffset
|
135 |
return wikipedia_predictions, enclosed_entity, lOffset, rOffset, percentages
|
|
|
120 |
wikipedia_predictions = self.tokenizer.batch_decode(
|
121 |
outputs.sequences, skip_special_tokens=True
|
122 |
)
|
123 |
+
# Initialize confidence list for each token
|
124 |
+
token_confidences = []
|
125 |
+
|
126 |
+
# Process the scores for each token
|
127 |
+
all_probabilities = []
|
128 |
+
import torch.nn.functional as F
|
129 |
+
|
130 |
+
# Process each score (logits for the generated tokens)
|
131 |
+
for i, score in enumerate(scores):
|
132 |
+
# Apply softmax to convert logits into probabilities
|
133 |
+
probabilities = F.softmax(score, dim=-1)
|
134 |
+
|
135 |
+
# Get the probabilities for the top tokens
|
136 |
+
top_probabilities = (
|
137 |
+
probabilities.cpu().numpy()
|
138 |
+
) # Move to CPU and convert to NumPy
|
139 |
+
|
140 |
+
# Store the probabilities
|
141 |
+
all_probabilities.append(top_probabilities)
|
142 |
+
|
143 |
+
# Convert probabilities into percentages if needed
|
144 |
+
percentages = [(prob * 100.0).tolist() for prob in all_probabilities]
|
145 |
|
146 |
# Return the predictions along with the extracted entity, lOffset, and rOffset
|
147 |
return wikipedia_predictions, enclosed_entity, lOffset, rOffset, percentages
|