File size: 1,330 Bytes
8e2663d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from functools import partial
from optimum.intel.openvino import OVQuantizer, OVModelForTokenClassification
from transformers import AutoTokenizer, AutoModelForTokenClassification

model_id = "elastic/distilbert-base-uncased-finetuned-conll03-english"
# model_id = "xlm-roberta-large-finetuned-conll03-english"
model_id = "dbmdz/bert-large-cased-finetuned-conll03-english"
model = AutoModelForTokenClassification.from_pretrained(model_id)    
tokenizer = AutoTokenizer.from_pretrained(model_id)
# tokenizer.pad_token_id=0

def preprocess_fn(examples, tokenizer):
    return tokenizer(
        examples["tokens"], padding="max_length", max_length=128, truncation=True, is_split_into_words=True
    )

quantizer = OVQuantizer.from_pretrained(model)
calibration_dataset = quantizer.get_calibration_dataset(
    "conll2003",
    preprocess_function=partial(preprocess_fn, tokenizer=tokenizer),
    num_samples=300,
    dataset_split="validation",
    preprocess_batch=True,
)
# The directory where the quantized model will be saved
save_dir = f"{model_id}_ov_int8"
# Apply static quantization and save the resulting model in the OpenVINO IR format
quantizer.quantize(calibration_dataset=calibration_dataset, save_directory=save_dir)
# Load the quantized model
optimized_model = OVModelForTokenClassification.from_pretrained(save_dir)