42

Browse files

Files changed (9) hide show

.gitattributes +5 -1
README.md +9 -17
config.json +15 -158
handler copy.py +0 -51
handler.py +26 -5
pytorch_model.bin +3 -0
requirements.txt +1 -9
tokenizer.json +0 -0
tokenizer_config.json +1 -1

.gitattributes CHANGED Viewed

@@ -9,10 +9,14 @@
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
@@ -23,5 +27,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
-*.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,24 +1,16 @@
 ---
 tags:
 - text-classification
 - endpoints-template
-- optimum
-library_name: generic
 ---
-# Optimized and Quantized DistilBERT with a custom pipeline with handler.py
-> NOTE: Blog post coming soon
-This is a template repository for Text Classification using Optimum and onnxruntime to support generic inference with Hugging Face Hub generic Inference API. There are two required steps:
-1. Specify the requirements by defining a `requirements.txt` file.
-2. Implement the `handler.py` `__init__` and `__call__` methods. These methods are called by the Inference API. The `__init__` method should load the model and preload the optimum model and tokenizers as well as the `text-classification` pipeline needed for inference. This is only called once. The `__call__` method performs the actual inference. Make sure to follow the same input/output specifications defined in the template for the pipeline to work.
-add
-```
-library_name: generic
-```
-to the readme.
-_note: the `generic` community image currently only support `inputs` as parameter and no parameter._

 ---
+language:
+- en
 tags:
 - text-classification
+- emotion
 - endpoints-template
+license: apache-2.0
+datasets:
+- emotion
+metrics:
+- Accuracy, F1 Score
 ---
+# Fork of [bhadresh-savani/distilbert-base-uncased-emotion](https://huggingface.co/bhadresh-savani/distilbert-base-uncased-emotion)

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "optimum/distilbert-base-uncased-finetuned-banking77",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"
@@ -9,174 +9,31 @@
   "dropout": 0.1,
   "hidden_dim": 3072,
   "id2label": {
-    "0": "activate_my_card",
-    "1": "age_limit",
-    "2": "apple_pay_or_google_pay",
-    "3": "atm_support",
-    "4": "automatic_top_up",
-    "5": "balance_not_updated_after_bank_transfer",
-    "6": "balance_not_updated_after_cheque_or_cash_deposit",
-    "7": "beneficiary_not_allowed",
-    "8": "cancel_transfer",
-    "9": "card_about_to_expire",
-    "10": "card_acceptance",
-    "11": "card_arrival",
-    "12": "card_delivery_estimate",
-    "13": "card_linking",
-    "14": "card_not_working",
-    "15": "card_payment_fee_charged",
-    "16": "card_payment_not_recognised",
-    "17": "card_payment_wrong_exchange_rate",
-    "18": "card_swallowed",
-    "19": "cash_withdrawal_charge",
-    "20": "cash_withdrawal_not_recognised",
-    "21": "change_pin",
-    "22": "compromised_card",
-    "23": "contactless_not_working",
-    "24": "country_support",
-    "25": "declined_card_payment",
-    "26": "declined_cash_withdrawal",
-    "27": "declined_transfer",
-    "28": "direct_debit_payment_not_recognised",
-    "29": "disposable_card_limits",
-    "30": "edit_personal_details",
-    "31": "exchange_charge",
-    "32": "exchange_rate",
-    "33": "exchange_via_app",
-    "34": "extra_charge_on_statement",
-    "35": "failed_transfer",
-    "36": "fiat_currency_support",
-    "37": "get_disposable_virtual_card",
-    "38": "get_physical_card",
-    "39": "getting_spare_card",
-    "40": "getting_virtual_card",
-    "41": "lost_or_stolen_card",
-    "42": "lost_or_stolen_phone",
-    "43": "order_physical_card",
-    "44": "passcode_forgotten",
-    "45": "pending_card_payment",
-    "46": "pending_cash_withdrawal",
-    "47": "pending_top_up",
-    "48": "pending_transfer",
-    "49": "pin_blocked",
-    "50": "receiving_money",
-    "51": "Refund_not_showing_up",
-    "52": "request_refund",
-    "53": "reverted_card_payment?",
-    "54": "supported_cards_and_currencies",
-    "55": "terminate_account",
-    "56": "top_up_by_bank_transfer_charge",
-    "57": "top_up_by_card_charge",
-    "58": "top_up_by_cash_or_cheque",
-    "59": "top_up_failed",
-    "60": "top_up_limits",
-    "61": "top_up_reverted",
-    "62": "topping_up_by_card",
-    "63": "transaction_charged_twice",
-    "64": "transfer_fee_charged",
-    "65": "transfer_into_account",
-    "66": "transfer_not_received_by_recipient",
-    "67": "transfer_timing",
-    "68": "unable_to_verify_identity",
-    "69": "verify_my_identity",
-    "70": "verify_source_of_funds",
-    "71": "verify_top_up",
-    "72": "virtual_card_not_working",
-    "73": "visa_or_mastercard",
-    "74": "why_verify_identity",
-    "75": "wrong_amount_of_cash_received",
-    "76": "wrong_exchange_rate_for_cash_withdrawal"
   },
   "initializer_range": 0.02,
   "label2id": {
-    "Refund_not_showing_up": 51,
-    "activate_my_card": 0,
-    "age_limit": 1,
-    "apple_pay_or_google_pay": 2,
-    "atm_support": 3,
-    "automatic_top_up": 4,
-    "balance_not_updated_after_bank_transfer": 5,
-    "balance_not_updated_after_cheque_or_cash_deposit": 6,
-    "beneficiary_not_allowed": 7,
-    "cancel_transfer": 8,
-    "card_about_to_expire": 9,
-    "card_acceptance": 10,
-    "card_arrival": 11,
-    "card_delivery_estimate": 12,
-    "card_linking": 13,
-    "card_not_working": 14,
-    "card_payment_fee_charged": 15,
-    "card_payment_not_recognised": 16,
-    "card_payment_wrong_exchange_rate": 17,
-    "card_swallowed": 18,
-    "cash_withdrawal_charge": 19,
-    "cash_withdrawal_not_recognised": 20,
-    "change_pin": 21,
-    "compromised_card": 22,
-    "contactless_not_working": 23,
-    "country_support": 24,
-    "declined_card_payment": 25,
-    "declined_cash_withdrawal": 26,
-    "declined_transfer": 27,
-    "direct_debit_payment_not_recognised": 28,
-    "disposable_card_limits": 29,
-    "edit_personal_details": 30,
-    "exchange_charge": 31,
-    "exchange_rate": 32,
-    "exchange_via_app": 33,
-    "extra_charge_on_statement": 34,
-    "failed_transfer": 35,
-    "fiat_currency_support": 36,
-    "get_disposable_virtual_card": 37,
-    "get_physical_card": 38,
-    "getting_spare_card": 39,
-    "getting_virtual_card": 40,
-    "lost_or_stolen_card": 41,
-    "lost_or_stolen_phone": 42,
-    "order_physical_card": 43,
-    "passcode_forgotten": 44,
-    "pending_card_payment": 45,
-    "pending_cash_withdrawal": 46,
-    "pending_top_up": 47,
-    "pending_transfer": 48,
-    "pin_blocked": 49,
-    "receiving_money": 50,
-    "request_refund": 52,
-    "reverted_card_payment?": 53,
-    "supported_cards_and_currencies": 54,
-    "terminate_account": 55,
-    "top_up_by_bank_transfer_charge": 56,
-    "top_up_by_card_charge": 57,
-    "top_up_by_cash_or_cheque": 58,
-    "top_up_failed": 59,
-    "top_up_limits": 60,
-    "top_up_reverted": 61,
-    "topping_up_by_card": 62,
-    "transaction_charged_twice": 63,
-    "transfer_fee_charged": 64,
-    "transfer_into_account": 65,
-    "transfer_not_received_by_recipient": 66,
-    "transfer_timing": 67,
-    "unable_to_verify_identity": 68,
-    "verify_my_identity": 69,
-    "verify_source_of_funds": 70,
-    "verify_top_up": 71,
-    "virtual_card_not_working": 72,
-    "visa_or_mastercard": 73,
-    "why_verify_identity": 74,
-    "wrong_amount_of_cash_received": 75,
-    "wrong_exchange_rate_for_cash_withdrawal": 76
   },
   "max_position_embeddings": 512,
   "n_heads": 12,
   "n_layers": 6,
   "pad_token_id": 0,
-  "problem_type": "single_label_classification",
   "qa_dropout": 0.1,
   "seq_classif_dropout": 0.2,
   "sinusoidal_pos_embds": false,
   "tie_weights_": true,
-  "torch_dtype": "float32",
-  "transformers_version": "4.19.2",
   "vocab_size": 30522
 }

 {
+  "_name_or_path": "./",
   "activation": "gelu",
   "architectures": [
     "DistilBertForSequenceClassification"
   "dropout": 0.1,
   "hidden_dim": 3072,
   "id2label": {
+    "0": "sadness",
+    "1": "joy",
+    "2": "love",
+    "3": "anger",
+    "4": "fear",
+    "5": "surprise"
   },
   "initializer_range": 0.02,
   "label2id": {
+    "anger": 3,
+    "fear": 4,
+    "joy": 1,
+    "love": 2,
+    "sadness": 0,
+    "surprise": 5
   },
   "max_position_embeddings": 512,
+  "model_type": "distilbert",
   "n_heads": 12,
   "n_layers": 6,
   "pad_token_id": 0,
   "qa_dropout": 0.1,
   "seq_classif_dropout": 0.2,
   "sinusoidal_pos_embds": false,
   "tie_weights_": true,
+  "transformers_version": "4.11.0.dev0",
   "vocab_size": 30522
 }

handler copy.py DELETED Viewed

@@ -1,51 +0,0 @@
-from typing import  Dict, List, Any
-# import torch
-from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
-# from optimum.onnxruntime import ORTModelForSequenceClassification
-from transformers import AutoModel
-# from transformers import AutoModelForSequenceClassification, AutoTokenizer
-from transformers import pipeline, AutoTokenizer
-# checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-checkpoint = "distilbert-base-uncased"
-class EndpointHandler():
-    def __init__(self, path=""):
-        # load the optimized model
-        # model = ORTModelForSequenceClassification.from_pretrained(path)
-        # model = AutoModel.from_pretrained(checkpoint)
-        # model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-        # tokenizer = AutoTokenizer.from_pretrained(path)
-        # tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=checkpoint)
-        model = DistilBertForSequenceClassification.from_pretrained(checkpoint)
-        tokenizer = DistilBertTokenizer.from_pretrained(checkpoint)
-        # create inference pipeline
-        self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
-    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
-        """
-        Args:
-            data (:obj:):
-                includes the input data and the parameters for the inference.
-        Return:
-            A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
-                - "label": A string representing what the label/class is. There can be multiple labels.
-                - "score": A score between 0 and 1 describing how confident the model is for this label/class.
-        """
-        inputs = data.pop("inputs", data)
-        parameters = data.pop("parameters", None)
-        # pass inputs with all kwargs in data
-        if parameters is not None:
-            prediction = self.pipeline(inputs, **parameters)
-        else:
-            prediction = self.pipeline(inputs)
-        # postprocess the prediction
-        return prediction

handler.py CHANGED Viewed

@@ -1,9 +1,30 @@
-from typing import  Dict, List, Any
-class EndpointHandler():
     def __init__(self, path=""):
-        pass
-    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
-        return 42

+from typing import Dict, List, Any
+from transformers import pipeline
+import holidays
+class EndpointHandler:
     def __init__(self, path=""):
+        self.pipeline = pipeline("text-classification", model=path)
+        self.holidays = holidays.US()
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+         data args:
+              inputs (:obj: `str`)
+              date (:obj: `str`)
+        Return:
+              A :obj:`list` | `dict`: will be serialized and returned
+        """
+        # get inputs
+        inputs = data.pop("inputs", data)
+        # get additional date field
+        date = data.pop("date", None)
+        # check if date exists and if it is a holiday
+        if date is not None and date in self.holidays:
+            return [{"label": "happy", "score": 1}]
+        # run normal prediction
+        prediction = self.pipeline(inputs)
+        return prediction

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5aa7398d830fcc94f95af88d7cc3013813668cfc58a07d75a8116cfd8af75c4d
+size 267875479

requirements.txt CHANGED Viewed

@@ -1,9 +1 @@
-optimum[onnxruntime]==1.2.3
-mkl-include
-mkl
-#pipeline
-# spacy
-transformers
-datasets
-evaluate
-# torch


1	+ holidays

tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "~~onnx~~"~~, "tokenizer_class": "DistilBertTokenizer"~~}


1	+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased"}