vamsibanda commited on
Commit
a286e00
1 Parent(s): b2dfc40

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +98 -0
README.md CHANGED
@@ -1,3 +1,101 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+
5
+ ## Convert pytorch model to onnx format.
6
+
7
+ import torch
8
+ import onnx
9
+ import onnxruntime
10
+ from onnxruntime import InferenceSession
11
+ from transformers import RobertaTokenizer, RobertaModel
12
+ from transformers.convert_graph_to_onnx import convert
13
+ import numpy as np
14
+ from onnxruntime.transformers import optimizer
15
+ from pathlib import Path
16
+ from onnxruntime.quantization import quantize_dynamic, QuantType
17
+ from sentence_transformers import SentenceTransformer, util
18
+
19
+ sbert = SentenceTransformer('sentence-transformers/all-roberta-large-v1')
20
+ sbert.save('sbert-all-roberta-large-v1')
21
+
22
+ tokenizer = RobertaTokenizer.from_pretrained('sentence-transformers/all-roberta-large-v1')
23
+ model = RobertaModel.from_pretrained('sentence-transformers/all-roberta-large-v1')
24
+ model.save_pretrained('./all-roberta-large-v1/')
25
+ tokenizer.save_pretrained('./all-roberta-large-v1/')
26
+
27
+ opt_model_path = "onnx-model/sbert-roberta-large.onnx"
28
+ convert(framework='pt', model='./all-roberta-large-v1/', output= Path(opt_model_path), opset=12, use_external_format=False, pipeline_name='feature-extraction')
29
+
30
+ quantize_dynamic(
31
+ model_input='onnx-model/sbert-roberta-large.onnx',
32
+ model_output='onnx-model/sbert-roberta-large-quant.onnx',
33
+ per_channel=True,
34
+ reduce_range=True,
35
+ activation_type=QuantType.QUInt8,
36
+ weight_type=QuantType.QInt8,
37
+ optimize_model=False,
38
+ use_external_data_format=False
39
+ )
40
+ ##Copy pooling layer and tokenizer files to the output directory
41
+
42
+
43
+ ## How to generate embeddings?
44
+
45
+ from onnxruntime import InferenceSession
46
+ import torch
47
+ from transformers.modeling_outputs import BaseModelOutput
48
+ from transformers import RobertaTokenizerFast
49
+ import torch.nn.functional as F
50
+ from sentence_transformers.models import Transformer, Pooling, Dense
51
+
52
+ class RobertaEncoder(torch.nn.Module):
53
+ def __init__(self, encoder_sess):
54
+ super().__init__()
55
+ self.encoder = encoder_sess
56
+
57
+ def forward(
58
+ self,
59
+ input_ids,
60
+ attention_mask,
61
+ inputs_embeds=None,
62
+ head_mask=None,
63
+ output_attentions=None,
64
+ output_hidden_states=None,
65
+ return_dict=None,
66
+ ):
67
+
68
+ encoder_hidden_state = torch.from_numpy(
69
+ self.encoder.run(
70
+ None,
71
+ {
72
+ "input_ids": input_ids.cpu().numpy(),
73
+ "attention_mask": attention_mask.cpu().numpy(),
74
+
75
+ },
76
+ )[0]
77
+ )
78
+
79
+ return BaseModelOutput(encoder_hidden_state)
80
+
81
+ def mean_pooling(model_output, attention_mask):
82
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
83
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
84
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
85
+
86
+ def sbert_onnx_encode(sentence_input):
87
+ token = roberta_tokenizer(sentence_input, return_tensors='pt')
88
+ encoder_outputs = encoder_layer(input_ids=token['input_ids'], attention_mask=token['attention_mask'])
89
+ sbert_embeddings = mean_pooling(encoder_outputs, token['attention_mask'])
90
+ sbert_embeddings = F.normalize(sbert_embeddings, p=2, dim=1)
91
+ return sbert_embeddings.tolist()[0]
92
+
93
+ roberta_tokenizer = RobertaTokenizerFast.from_pretrained('sbert-onnx-all-roberta-large-v1')
94
+ encoder_sess = InferenceSession('sbert-onnx-all-roberta-large-v1/sbert-roberta-large-quant.onnx')
95
+ encoder_layer = RobertaEncoder(encoder_sess)
96
+ pooling_layer = Pooling.load('./sbert-onnx-all-roberta-large-v1/1_Pooling/')
97
+
98
+ m1 = sbert_onnx_encode('That is a happy person')
99
+ m2 = sbert.encode('That is a happy person').tolist()
100
+ print(util.cos_sim(m1,m2))
101
+ ##tensor([[0.9925]])