End of training
Browse files- ConSec.py +22 -11
- README.md +11 -11
- model.safetensors +1 -1
ConSec.py
CHANGED
|
@@ -128,7 +128,7 @@ class ConSecTagger:
|
|
| 128 |
labeller=json_labeller):
|
| 129 |
self.model = model
|
| 130 |
self.tokenizer = tokenizer
|
| 131 |
-
special_tokens = self.tokenizer.
|
| 132 |
self.start_token = special_tokens["[START]"]
|
| 133 |
self.gloss_token = special_tokens["[GLOSS]"]
|
| 134 |
self.sequencer = sequencer
|
|
@@ -148,27 +148,38 @@ class ConSecTagger:
|
|
| 148 |
with self.model.device:
|
| 149 |
tokens = self.tokenizer(text,"[GLOSS] ".join(glosses),
|
| 150 |
return_tensors="pt")
|
| 151 |
-
length = tokens.input_ids.shape[
|
| 152 |
positions = torch.arange(length)
|
| 153 |
-
place = (tokens.input_ids==self.start_token).nonzero(as_tuple=True)[
|
| 154 |
-
wordpos = tokens.token_to_word(place
|
| 155 |
-
gloss_positions =
|
|
|
|
| 156 |
gloss_positions.append(length)
|
| 157 |
n_candidates = len(candidates)
|
| 158 |
for (i,position) in enumerate(gloss_positions[:-1]):
|
| 159 |
if i<n_candidates:
|
| 160 |
-
end = place + gloss_positions[i+1]-position
|
| 161 |
-
positions
|
| 162 |
else:
|
| 163 |
known = already_tagged[i-n_candidates]
|
| 164 |
start = tokens.word_to_tokens(known["place"]).start
|
| 165 |
-
end = start + gloss_positions[i+1] - position
|
| 166 |
positions[position:gloss_positions[i+1]] = torch.arange(start,end)
|
| 167 |
-
prediction = self.model(
|
| 168 |
attention_mask=tokens.attention_mask,
|
| 169 |
token_type_ids=tokens.token_type_ids,
|
| 170 |
-
position_ids=positions)
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
already_tagged.append({"label":label,
|
| 173 |
"place":wordpos,
|
| 174 |
"index":index})
|
|
|
|
| 128 |
labeller=json_labeller):
|
| 129 |
self.model = model
|
| 130 |
self.tokenizer = tokenizer
|
| 131 |
+
special_tokens = self.tokenizer.get_added_vocab()
|
| 132 |
self.start_token = special_tokens["[START]"]
|
| 133 |
self.gloss_token = special_tokens["[GLOSS]"]
|
| 134 |
self.sequencer = sequencer
|
|
|
|
| 148 |
with self.model.device:
|
| 149 |
tokens = self.tokenizer(text,"[GLOSS] ".join(glosses),
|
| 150 |
return_tensors="pt")
|
| 151 |
+
length = tokens.input_ids.shape[1]
|
| 152 |
positions = torch.arange(length)
|
| 153 |
+
place = (tokens.input_ids==self.start_token).nonzero(as_tuple=True)[1].item()
|
| 154 |
+
wordpos = tokens.token_to_word(place)
|
| 155 |
+
gloss_positions = [index.item()
|
| 156 |
+
for index in (tokens.input_ids==self.gloss_token).nonzero(as_tuple=True)[1]]
|
| 157 |
gloss_positions.append(length)
|
| 158 |
n_candidates = len(candidates)
|
| 159 |
for (i,position) in enumerate(gloss_positions[:-1]):
|
| 160 |
if i<n_candidates:
|
| 161 |
+
end = (place + gloss_positions[i+1]-position)
|
| 162 |
+
positions[position:gloss_positions[i+1]] = torch.arange(place,end)
|
| 163 |
else:
|
| 164 |
known = already_tagged[i-n_candidates]
|
| 165 |
start = tokens.word_to_tokens(known["place"]).start
|
| 166 |
+
end = (start + gloss_positions[i+1] - position)
|
| 167 |
positions[position:gloss_positions[i+1]] = torch.arange(start,end)
|
| 168 |
+
prediction = self.model(input_ids=tokens.input_ids,
|
| 169 |
attention_mask=tokens.attention_mask,
|
| 170 |
token_type_ids=tokens.token_type_ids,
|
| 171 |
+
position_ids=positions.reshape((1,length)))
|
| 172 |
+
try:
|
| 173 |
+
label = candidates[prediction.logits.argmax()]
|
| 174 |
+
except IndexError:
|
| 175 |
+
print(text)
|
| 176 |
+
print(gloss_positions)
|
| 177 |
+
print([positions[pos].item() for pos in gloss_positions[:-1]])
|
| 178 |
+
print(already_tagged)
|
| 179 |
+
print(candidates)
|
| 180 |
+
print(prediction.logits)
|
| 181 |
+
print(prediction.logits.argmax())
|
| 182 |
+
raise
|
| 183 |
already_tagged.append({"label":label,
|
| 184 |
"place":wordpos,
|
| 185 |
"index":index})
|
README.md
CHANGED
|
@@ -22,11 +22,11 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 22 |
|
| 23 |
This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
|
| 24 |
It achieves the following results on the evaluation set:
|
| 25 |
-
- Loss:
|
| 26 |
-
- Precision: 0.
|
| 27 |
-
- Recall: 0.
|
| 28 |
-
- F1: 0.
|
| 29 |
-
- Matthews: 0.
|
| 30 |
|
| 31 |
## Model description
|
| 32 |
|
|
@@ -58,12 +58,12 @@ The following hyperparameters were used during training:
|
|
| 58 |
|
| 59 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Matthews |
|
| 60 |
|:-------------:|:-----:|:------:|:---------------:|:---------:|:------:|:------:|:--------:|
|
| 61 |
-
| No log | 0 | 0 |
|
| 62 |
-
|
|
| 63 |
-
|
|
| 64 |
-
|
|
| 65 |
-
|
|
| 66 |
-
|
|
| 67 |
|
| 68 |
|
| 69 |
### Framework versions
|
|
|
|
| 22 |
|
| 23 |
This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on the None dataset.
|
| 24 |
It achieves the following results on the evaluation set:
|
| 25 |
+
- Loss: 1.5775
|
| 26 |
+
- Precision: 0.4804
|
| 27 |
+
- Recall: 0.4917
|
| 28 |
+
- F1: 0.4860
|
| 29 |
+
- Matthews: 0.4909
|
| 30 |
|
| 31 |
## Model description
|
| 32 |
|
|
|
|
| 58 |
|
| 59 |
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Matthews |
|
| 60 |
|:-------------:|:-----:|:------:|:---------------:|:---------:|:------:|:------:|:--------:|
|
| 61 |
+
| No log | 0 | 0 | 344.1697 | 0.4603 | 0.3243 | 0.3805 | 0.3236 |
|
| 62 |
+
| 6.7210 | 1.0 | 56179 | 1.5766 | 0.4804 | 0.4917 | 0.4860 | 0.4909 |
|
| 63 |
+
| 5.7990 | 2.0 | 112358 | 1.5649 | 0.4859 | 0.4943 | 0.4900 | 0.4935 |
|
| 64 |
+
| 6.3812 | 3.0 | 168537 | 1.5669 | 0.4804 | 0.4926 | 0.4864 | 0.4918 |
|
| 65 |
+
| 5.8106 | 4.0 | 224716 | 1.5847 | 0.4834 | 0.4921 | 0.4877 | 0.4913 |
|
| 66 |
+
| 6.0390 | 5.0 | 280895 | 1.5775 | 0.4804 | 0.4917 | 0.4860 | 0.4909 |
|
| 67 |
|
| 68 |
|
| 69 |
### Framework versions
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 367690992
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc8e2038fc80452fdf46c78fad96e984da050257f1fed05392bbbb4511d9f4a1
|
| 3 |
size 367690992
|