File size: 1,747 Bytes
bfeaab7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import torch
from transformers import AutoTokenizer
from modeling_modernbert_crf import ModernBertForTokenClassificationCRF
class DisableCompileContextManager:
def __init__(self):
self._original_compile = torch.compile
def __enter__(self):
torch.compile = lambda *args, **kwargs: lambda x: x
def __exit__(self, exc_type, exc_val, exc_tb):
torch.compile = self._original_compile
def export():
with DisableCompileContextManager():
tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base", model_max_length=4096)
model = ModernBertForTokenClassificationCRF.from_pretrained(
"./modernbert-ner-crf-hf", attn_implementation="eager"
)
model.eval()
samples = ["Hello, this is a test sentence."]
tokenized = tokenizer(samples, return_tensors='pt', max_length=128, padding='max_length', truncation=True)
input_ids = tokenized["input_ids"]
attention_mask = tokenized["attention_mask"]
model = model.to('cpu')
with torch.no_grad():
torch.onnx.export(
model,
(input_ids, attention_mask),
"modernbert_crf_emissions.onnx",
input_names=["input_ids", "attention_mask"],
output_names=["logits"],
opset_version=14,
dynamic_axes={
"input_ids": {0: "batch_size", 1: "seq_length"},
"attention_mask": {0: "batch_size", 1: "seq_length"},
"logits": {0: "batch_size", 1: "seq_length"},
},
)
print("✅ Exported model to modernbert_crf_emissions.onnx")
if __name__ == '__main__':
export()
|