End of training

Browse files

Files changed (5) hide show

DisamBertCrossEncoder.py +125 -0
README.md +19 -19
config.json +3 -0
model.safetensors +1 -1
tokenizer.json +8 -1

DisamBertCrossEncoder.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from collections.abc import Generator, Iterable
+from dataclasses import dataclass
+from enum import StrEnum
+from itertools import chain
+from nltk.corpus import wordnet
+from nltk.metrics import edit_distance
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn as nn
+from transformers import (
+    AutoConfig,
+    AutoModel,
+    AutoTokenizer,
+    ModernBertModel,
+    PreTrainedConfig,
+    PreTrainedModel,
+)
+from transformers.modeling_outputs import SequenceClassifierOutput
+BATCH_SIZE = 16
+class ModelURI(StrEnum):
+    BASE = "answerdotai/ModernBERT-base"
+    LARGE = "answerdotai/ModernBERT-large"
+@dataclass(slots=True, frozen=True)
+class LexicalExample:
+    concept: str
+    definition: str
+@dataclass(slots=True, frozen=True)
+class PaddedBatch:
+    input_ids: torch.Tensor
+    attention_mask: torch.Tensor
+class DisamBertCrossEncoder(PreTrainedModel):
+    def __init__(self, config: PreTrainedConfig):
+        super().__init__(config)
+        if config.init_basemodel:
+            self.BaseModel = AutoModel.from_pretrained(config.name_or_path, device_map="auto")
+        else:
+            self.BaseModel = ModernBertModel(config)
+        config.init_basemodel = False
+        self.loss = nn.BCEWithLogitsLoss()
+        self.post_init()
+    @classmethod
+    def from_base(cls, base_id: ModelURI):
+        config = AutoConfig.from_pretrained(base_id)
+        config.init_basemodel = True
+        config.tokenizer_path = base_id
+        return cls(config)
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        attention_mask: torch.Tensor,
+        labels: torch.Tensor | None = None,
+        output_hidden_states: bool = False,
+        output_attentions: bool = False,
+    ) -> SequenceClassifierOutput:
+        base_model_output = self.BaseModel(
+            input_ids,
+            attention_mask,
+            output_hidden_states=output_hidden_states,
+            output_attentions=output_attentions,
+        )
+        token_vectors = base_model_output.last_hidden_state
+        prev = -1
+        rows = []
+        cols = []
+        for (i,j) in (input_ids == self.config.sep_token_id).nonzero():
+            if i!=prev:
+                rows.append(i)
+                cols.append(j)
+                prev=i
+        gloss_vectors = token_vectors[rows,cols]
+        logits = torch.einsum("ij,ij->i",token_vectors[:,0],gloss_vectors)
+        return SequenceClassifierOutput(
+            logits=logits,
+            loss=self.loss(logits, labels) if labels is not None else None,
+            hidden_states=base_model_output.hidden_states if output_hidden_states else None,
+            attentions=base_model_output.attentions if output_attentions else None,
+        )
+def get_lemma(text: str, synset: wordnet.synset) -> wordnet.lemma:
+    best_score = 1000000
+    best_lemma = None
+    for lemma in synset.lemmas():
+        score = edit_distance(text, lemma.name())
+        if score < best_score:
+            best_score = score
+            best_lemma = lemma
+    return best_lemma
+class CrossEncoderTagger:
+    def __init__(self,url:str):
+        self.model=AutoModel.from_pretrained(url,
+                                             device_map="auto",
+                                             trust_remote_code=True)
+        print(self.model)
+        self.tokenizer=AutoTokenizer.from_pretrained(url)
+    def __call__(self,target:str,sentence:str,candidates:str)->str:
+        text = f"{target}::{sentence}"
+        synsets = [wordnet.synset(candidate) for candidate in candidates]
+        definitions = [f"{get_lemma(target,synset)}::{synset.definition()}"
+                       for synset in synsets]
+        sentences = [text]*len(candidates)
+        with self.model.device:
+            tokens = self.tokenizer(sentences,definitions,padding=True,return_tensors="pt")
+            output = self.model(tokens.input_ids,
+                                tokens.attention_mask)
+            print(dir(output))
+            logits = output.logits
+            return candidates[logits.argmax()]

README.md CHANGED Viewed

@@ -24,12 +24,12 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.9841
-- Precision: 0.6896
-- Recall: 0.6396
-- F1: 0.6636
-- Accuracy: 0.9412
-- Matthews Correlation: 0.6320
 ## Model description
@@ -60,19 +60,19 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step   | Validation Loss | Precision | Recall | F1     | Accuracy | Matthews Correlation |
-|:-------------:|:-----:|:------:|:---------------:|:---------:|:------:|:------:|:--------:|:--------------------:|
-| No log        | 0     | 0      | 430.2531        | 0.0905    | 0.9978 | 0.1660 | 0.0911   | -0.0157              |
-| 0.0672        | 1.0   | 12551  | 0.1555          | 0.6786    | 0.5846 | 0.6281 | 0.9372   | 0.5960               |
-| 0.0550        | 2.0   | 25102  | 0.1447          | 0.7176    | 0.6813 | 0.6990 | 0.9468   | 0.6701               |
-| 0.0427        | 3.0   | 37653  | 0.1498          | 0.7690    | 0.6440 | 0.7010 | 0.9502   | 0.6772               |
-| 0.0309        | 4.0   | 50204  | 0.1779          | 0.6773    | 0.7011 | 0.6890 | 0.9426   | 0.6575               |
-| 0.0179        | 5.0   | 62755  | 0.2554          | 0.7021    | 0.6681 | 0.6847 | 0.9442   | 0.6543               |
-| 0.0092        | 6.0   | 75306  | 0.3257          | 0.6927    | 0.6637 | 0.6779 | 0.9428   | 0.6467               |
-| 0.0047        | 7.0   | 87857  | 0.4757          | 0.6674    | 0.6791 | 0.6732 | 0.9402   | 0.6403               |
-| 0.0022        | 8.0   | 100408 | 0.6664          | 0.6943    | 0.6440 | 0.6682 | 0.9420   | 0.6370               |
-| 0.0011        | 9.0   | 112959 | 0.8230          | 0.6872    | 0.6374 | 0.6613 | 0.9408   | 0.6295               |
-| 0.0009        | 10.0  | 125510 | 0.9841          | 0.6896    | 0.6396 | 0.6636 | 0.9412   | 0.6320               |
 ### Framework versions

 This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3160
+- Precision: 0.6783
+- Recall: 0.5978
+- F1: 0.6355
+- Accuracy: 0.9378
+- Matthews Correlation: 0.6031
 ## Model description
 ### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Precision | Recall | F1     | Accuracy | Matthews Correlation |
+|:-------------:|:-----:|:-----:|:---------------:|:---------:|:------:|:------:|:--------:|:--------------------:|
+| No log        | 0     | 0     | 1123.2456       | 0.0907    | 1.0    | 0.1663 | 0.0909   | 0.0045               |
+| 0.1943        | 1.0   | 9050  | 0.1832          | 0.7346    | 0.2615 | 0.3857 | 0.9245   | 0.4096               |
+| 0.1500        | 2.0   | 18100 | 0.1551          | 0.7019    | 0.4967 | 0.5817 | 0.9352   | 0.5574               |
+| 0.1242        | 3.0   | 27150 | 0.1481          | 0.7381    | 0.5451 | 0.6271 | 0.9412   | 0.6040               |
+| 0.1017        | 4.0   | 36200 | 0.1482          | 0.7413    | 0.5604 | 0.6383 | 0.9424   | 0.6147               |
+| 0.0774        | 5.0   | 45250 | 0.1564          | 0.7179    | 0.6154 | 0.6627 | 0.9432   | 0.6342               |
+| 0.0610        | 6.0   | 54300 | 0.1859          | 0.7579    | 0.5297 | 0.6235 | 0.9420   | 0.6044               |
+| 0.0434        | 7.0   | 63350 | 0.2016          | 0.6754    | 0.6264 | 0.6499 | 0.9388   | 0.6170               |
+| 0.0298        | 8.0   | 72400 | 0.2480          | 0.6520    | 0.6505 | 0.6513 | 0.9368   | 0.6165               |
+| 0.0216        | 9.0   | 81450 | 0.2961          | 0.6819    | 0.5890 | 0.6321 | 0.9378   | 0.6002               |
+| 0.0174        | 10.0  | 90500 | 0.3160          | 0.6783    | 0.5978 | 0.6355 | 0.9378   | 0.6031               |
 ### Framework versions

config.json CHANGED Viewed

@@ -4,6 +4,9 @@
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": null,
   "classifier_activation": "gelu",
   "classifier_bias": false,

   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoModel": "DisamBertCrossEncoder.DisamBertCrossEncoder"
+  },
   "bos_token_id": null,
   "classifier_activation": "gelu",
   "classifier_bias": false,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11ae98a5fdc6ef95a0c62701efd72e8656fa76263c9683a3dc6fd26a7b8e0df1
 size 596071480

 version https://git-lfs.github.com/spec/v1
+oid sha256:2cb625d94dadd5a1929c852bb4728f74906eab0e8898e2300353ddaed125bb08
 size 596071480

tokenizer.json CHANGED Viewed

@@ -1,7 +1,14 @@
 {
   "version": "1.0",
   "truncation": null,
-  "padding": null,
   "added_tokens": [
     {
       "id": 0,

 {
   "version": "1.0",
   "truncation": null,
+  "padding": {
+    "strategy": "BatchLongest",
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 50283,
+    "pad_type_id": 0,
+    "pad_token": "[PAD]"
+  },
   "added_tokens": [
     {
       "id": 0,