andreylitvinov commited on
Commit
6ccb51f
1 Parent(s): 536c182
Files changed (9) hide show
  1. .gitattributes +5 -1
  2. README.md +9 -17
  3. config.json +15 -158
  4. handler copy.py +0 -51
  5. handler.py +26 -5
  6. pytorch_model.bin +3 -0
  7. requirements.txt +1 -9
  8. tokenizer.json +0 -0
  9. tokenizer_config.json +1 -1
.gitattributes CHANGED
@@ -9,10 +9,14 @@
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
12
  *.onnx filter=lfs diff=lfs merge=lfs -text
13
  *.ot filter=lfs diff=lfs merge=lfs -text
14
  *.parquet filter=lfs diff=lfs merge=lfs -text
15
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
16
  *.pt filter=lfs diff=lfs merge=lfs -text
17
  *.pth filter=lfs diff=lfs merge=lfs -text
18
  *.rar filter=lfs diff=lfs merge=lfs -text
@@ -23,5 +27,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
23
  *.wasm filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
- *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
  *.onnx filter=lfs diff=lfs merge=lfs -text
15
  *.ot filter=lfs diff=lfs merge=lfs -text
16
  *.parquet filter=lfs diff=lfs merge=lfs -text
17
  *.pb filter=lfs diff=lfs merge=lfs -text
18
+ *.pickle filter=lfs diff=lfs merge=lfs -text
19
+ *.pkl filter=lfs diff=lfs merge=lfs -text
20
  *.pt filter=lfs diff=lfs merge=lfs -text
21
  *.pth filter=lfs diff=lfs merge=lfs -text
22
  *.rar filter=lfs diff=lfs merge=lfs -text
 
27
  *.wasm filter=lfs diff=lfs merge=lfs -text
28
  *.xz filter=lfs diff=lfs merge=lfs -text
29
  *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
  *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,24 +1,16 @@
1
  ---
 
 
2
  tags:
3
  - text-classification
 
4
  - endpoints-template
5
- - optimum
6
- library_name: generic
 
 
 
7
  ---
8
 
9
- # Optimized and Quantized DistilBERT with a custom pipeline with handler.py
10
 
11
- > NOTE: Blog post coming soon
12
-
13
- This is a template repository for Text Classification using Optimum and onnxruntime to support generic inference with Hugging Face Hub generic Inference API. There are two required steps:
14
-
15
- 1. Specify the requirements by defining a `requirements.txt` file.
16
- 2. Implement the `handler.py` `__init__` and `__call__` methods. These methods are called by the Inference API. The `__init__` method should load the model and preload the optimum model and tokenizers as well as the `text-classification` pipeline needed for inference. This is only called once. The `__call__` method performs the actual inference. Make sure to follow the same input/output specifications defined in the template for the pipeline to work.
17
-
18
- add
19
- ```
20
- library_name: generic
21
- ```
22
- to the readme.
23
-
24
- _note: the `generic` community image currently only support `inputs` as parameter and no parameter._
 
1
  ---
2
+ language:
3
+ - en
4
  tags:
5
  - text-classification
6
+ - emotion
7
  - endpoints-template
8
+ license: apache-2.0
9
+ datasets:
10
+ - emotion
11
+ metrics:
12
+ - Accuracy, F1 Score
13
  ---
14
 
 
15
 
16
+ # Fork of [bhadresh-savani/distilbert-base-uncased-emotion](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "optimum/distilbert-base-uncased-finetuned-banking77",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
@@ -9,174 +9,31 @@
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
  "id2label": {
12
- "0": "activate_my_card",
13
- "1": "age_limit",
14
- "2": "apple_pay_or_google_pay",
15
- "3": "atm_support",
16
- "4": "automatic_top_up",
17
- "5": "balance_not_updated_after_bank_transfer",
18
- "6": "balance_not_updated_after_cheque_or_cash_deposit",
19
- "7": "beneficiary_not_allowed",
20
- "8": "cancel_transfer",
21
- "9": "card_about_to_expire",
22
- "10": "card_acceptance",
23
- "11": "card_arrival",
24
- "12": "card_delivery_estimate",
25
- "13": "card_linking",
26
- "14": "card_not_working",
27
- "15": "card_payment_fee_charged",
28
- "16": "card_payment_not_recognised",
29
- "17": "card_payment_wrong_exchange_rate",
30
- "18": "card_swallowed",
31
- "19": "cash_withdrawal_charge",
32
- "20": "cash_withdrawal_not_recognised",
33
- "21": "change_pin",
34
- "22": "compromised_card",
35
- "23": "contactless_not_working",
36
- "24": "country_support",
37
- "25": "declined_card_payment",
38
- "26": "declined_cash_withdrawal",
39
- "27": "declined_transfer",
40
- "28": "direct_debit_payment_not_recognised",
41
- "29": "disposable_card_limits",
42
- "30": "edit_personal_details",
43
- "31": "exchange_charge",
44
- "32": "exchange_rate",
45
- "33": "exchange_via_app",
46
- "34": "extra_charge_on_statement",
47
- "35": "failed_transfer",
48
- "36": "fiat_currency_support",
49
- "37": "get_disposable_virtual_card",
50
- "38": "get_physical_card",
51
- "39": "getting_spare_card",
52
- "40": "getting_virtual_card",
53
- "41": "lost_or_stolen_card",
54
- "42": "lost_or_stolen_phone",
55
- "43": "order_physical_card",
56
- "44": "passcode_forgotten",
57
- "45": "pending_card_payment",
58
- "46": "pending_cash_withdrawal",
59
- "47": "pending_top_up",
60
- "48": "pending_transfer",
61
- "49": "pin_blocked",
62
- "50": "receiving_money",
63
- "51": "Refund_not_showing_up",
64
- "52": "request_refund",
65
- "53": "reverted_card_payment?",
66
- "54": "supported_cards_and_currencies",
67
- "55": "terminate_account",
68
- "56": "top_up_by_bank_transfer_charge",
69
- "57": "top_up_by_card_charge",
70
- "58": "top_up_by_cash_or_cheque",
71
- "59": "top_up_failed",
72
- "60": "top_up_limits",
73
- "61": "top_up_reverted",
74
- "62": "topping_up_by_card",
75
- "63": "transaction_charged_twice",
76
- "64": "transfer_fee_charged",
77
- "65": "transfer_into_account",
78
- "66": "transfer_not_received_by_recipient",
79
- "67": "transfer_timing",
80
- "68": "unable_to_verify_identity",
81
- "69": "verify_my_identity",
82
- "70": "verify_source_of_funds",
83
- "71": "verify_top_up",
84
- "72": "virtual_card_not_working",
85
- "73": "visa_or_mastercard",
86
- "74": "why_verify_identity",
87
- "75": "wrong_amount_of_cash_received",
88
- "76": "wrong_exchange_rate_for_cash_withdrawal"
89
  },
90
  "initializer_range": 0.02,
91
  "label2id": {
92
- "Refund_not_showing_up": 51,
93
- "activate_my_card": 0,
94
- "age_limit": 1,
95
- "apple_pay_or_google_pay": 2,
96
- "atm_support": 3,
97
- "automatic_top_up": 4,
98
- "balance_not_updated_after_bank_transfer": 5,
99
- "balance_not_updated_after_cheque_or_cash_deposit": 6,
100
- "beneficiary_not_allowed": 7,
101
- "cancel_transfer": 8,
102
- "card_about_to_expire": 9,
103
- "card_acceptance": 10,
104
- "card_arrival": 11,
105
- "card_delivery_estimate": 12,
106
- "card_linking": 13,
107
- "card_not_working": 14,
108
- "card_payment_fee_charged": 15,
109
- "card_payment_not_recognised": 16,
110
- "card_payment_wrong_exchange_rate": 17,
111
- "card_swallowed": 18,
112
- "cash_withdrawal_charge": 19,
113
- "cash_withdrawal_not_recognised": 20,
114
- "change_pin": 21,
115
- "compromised_card": 22,
116
- "contactless_not_working": 23,
117
- "country_support": 24,
118
- "declined_card_payment": 25,
119
- "declined_cash_withdrawal": 26,
120
- "declined_transfer": 27,
121
- "direct_debit_payment_not_recognised": 28,
122
- "disposable_card_limits": 29,
123
- "edit_personal_details": 30,
124
- "exchange_charge": 31,
125
- "exchange_rate": 32,
126
- "exchange_via_app": 33,
127
- "extra_charge_on_statement": 34,
128
- "failed_transfer": 35,
129
- "fiat_currency_support": 36,
130
- "get_disposable_virtual_card": 37,
131
- "get_physical_card": 38,
132
- "getting_spare_card": 39,
133
- "getting_virtual_card": 40,
134
- "lost_or_stolen_card": 41,
135
- "lost_or_stolen_phone": 42,
136
- "order_physical_card": 43,
137
- "passcode_forgotten": 44,
138
- "pending_card_payment": 45,
139
- "pending_cash_withdrawal": 46,
140
- "pending_top_up": 47,
141
- "pending_transfer": 48,
142
- "pin_blocked": 49,
143
- "receiving_money": 50,
144
- "request_refund": 52,
145
- "reverted_card_payment?": 53,
146
- "supported_cards_and_currencies": 54,
147
- "terminate_account": 55,
148
- "top_up_by_bank_transfer_charge": 56,
149
- "top_up_by_card_charge": 57,
150
- "top_up_by_cash_or_cheque": 58,
151
- "top_up_failed": 59,
152
- "top_up_limits": 60,
153
- "top_up_reverted": 61,
154
- "topping_up_by_card": 62,
155
- "transaction_charged_twice": 63,
156
- "transfer_fee_charged": 64,
157
- "transfer_into_account": 65,
158
- "transfer_not_received_by_recipient": 66,
159
- "transfer_timing": 67,
160
- "unable_to_verify_identity": 68,
161
- "verify_my_identity": 69,
162
- "verify_source_of_funds": 70,
163
- "verify_top_up": 71,
164
- "virtual_card_not_working": 72,
165
- "visa_or_mastercard": 73,
166
- "why_verify_identity": 74,
167
- "wrong_amount_of_cash_received": 75,
168
- "wrong_exchange_rate_for_cash_withdrawal": 76
169
  },
170
  "max_position_embeddings": 512,
 
171
  "n_heads": 12,
172
  "n_layers": 6,
173
  "pad_token_id": 0,
174
- "problem_type": "single_label_classification",
175
  "qa_dropout": 0.1,
176
  "seq_classif_dropout": 0.2,
177
  "sinusoidal_pos_embds": false,
178
  "tie_weights_": true,
179
- "torch_dtype": "float32",
180
- "transformers_version": "4.19.2",
181
  "vocab_size": 30522
182
  }
 
1
  {
2
+ "_name_or_path": "./",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
 
9
  "dropout": 0.1,
10
  "hidden_dim": 3072,
11
  "id2label": {
12
+ "0": "sadness",
13
+ "1": "joy",
14
+ "2": "love",
15
+ "3": "anger",
16
+ "4": "fear",
17
+ "5": "surprise"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  },
19
  "initializer_range": 0.02,
20
  "label2id": {
21
+ "anger": 3,
22
+ "fear": 4,
23
+ "joy": 1,
24
+ "love": 2,
25
+ "sadness": 0,
26
+ "surprise": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  "max_position_embeddings": 512,
29
+ "model_type": "distilbert",
30
  "n_heads": 12,
31
  "n_layers": 6,
32
  "pad_token_id": 0,
 
33
  "qa_dropout": 0.1,
34
  "seq_classif_dropout": 0.2,
35
  "sinusoidal_pos_embds": false,
36
  "tie_weights_": true,
37
+ "transformers_version": "4.11.0.dev0",
 
38
  "vocab_size": 30522
39
  }
handler copy.py DELETED
@@ -1,51 +0,0 @@
1
- from typing import Dict, List, Any
2
- # import torch
3
- from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
4
-
5
-
6
- # from optimum.onnxruntime import ORTModelForSequenceClassification
7
- from transformers import AutoModel
8
- # from transformers import AutoModelForSequenceClassification, AutoTokenizer
9
-
10
- from transformers import pipeline, AutoTokenizer
11
-
12
- # checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
13
- checkpoint = "distilbert-base-uncased"
14
-
15
- class EndpointHandler():
16
-
17
- def __init__(self, path=""):
18
- # load the optimized model
19
- # model = ORTModelForSequenceClassification.from_pretrained(path)
20
- # model = AutoModel.from_pretrained(checkpoint)
21
- # model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
22
-
23
- # tokenizer = AutoTokenizer.from_pretrained(path)
24
- # tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=checkpoint)
25
- model = DistilBertForSequenceClassification.from_pretrained(checkpoint)
26
- tokenizer = DistilBertTokenizer.from_pretrained(checkpoint)
27
-
28
- # create inference pipeline
29
- self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
30
-
31
-
32
- def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
33
- """
34
- Args:
35
- data (:obj:):
36
- includes the input data and the parameters for the inference.
37
- Return:
38
- A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
39
- - "label": A string representing what the label/class is. There can be multiple labels.
40
- - "score": A score between 0 and 1 describing how confident the model is for this label/class.
41
- """
42
- inputs = data.pop("inputs", data)
43
- parameters = data.pop("parameters", None)
44
-
45
- # pass inputs with all kwargs in data
46
- if parameters is not None:
47
- prediction = self.pipeline(inputs, **parameters)
48
- else:
49
- prediction = self.pipeline(inputs)
50
- # postprocess the prediction
51
- return prediction
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
handler.py CHANGED
@@ -1,9 +1,30 @@
1
- from typing import Dict, List, Any
 
 
2
 
3
- class EndpointHandler():
 
4
  def __init__(self, path=""):
5
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
 
 
 
7
 
8
- def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
9
- return 42
 
 
1
+ from typing import Dict, List, Any
2
+ from transformers import pipeline
3
+ import holidays
4
 
5
+
6
+ class EndpointHandler:
7
  def __init__(self, path=""):
8
+ self.pipeline = pipeline("text-classification", model=path)
9
+ self.holidays = holidays.US()
10
+
11
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
12
+ """
13
+ data args:
14
+ inputs (:obj: `str`)
15
+ date (:obj: `str`)
16
+ Return:
17
+ A :obj:`list` | `dict`: will be serialized and returned
18
+ """
19
+ # get inputs
20
+ inputs = data.pop("inputs", data)
21
+ # get additional date field
22
+ date = data.pop("date", None)
23
 
24
+ # check if date exists and if it is a holiday
25
+ if date is not None and date in self.holidays:
26
+ return [{"label": "happy", "score": 1}]
27
 
28
+ # run normal prediction
29
+ prediction = self.pipeline(inputs)
30
+ return prediction
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5aa7398d830fcc94f95af88d7cc3013813668cfc58a07d75a8116cfd8af75c4d
3
+ size 267875479
requirements.txt CHANGED
@@ -1,9 +1 @@
1
- optimum[onnxruntime]==1.2.3
2
- mkl-include
3
- mkl
4
- #pipeline
5
- # spacy
6
- transformers
7
- datasets
8
- evaluate
9
- # torch
 
1
+ holidays
 
 
 
 
 
 
 
 
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "onnx", "tokenizer_class": "DistilBertTokenizer"}
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased"}