Upload trained LLM router with quantized ONNX support

Files changed (9) hide show

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ You can install it with `pip install adaptive-classifier`.
 ## Model Details
-- Base Model: benchmark_results/onnx/
 - Number of Classes: 2
 - Total Examples: 10
 - Embedding Dimension: 768

 ## Model Details
+- Base Model: distilbert/distilbert-base-cased
 - Number of Classes: 2
 - Total Examples: 10
 - Embedding Dimension: 768

config.json CHANGED Viewed

@@ -39,7 +39,7 @@
     "HIGH": 0,
     "LOW": 1
   },
-  "model_name": "benchmark_results/onnx/",
   "train_steps": 3688,
   "training_history": {
     "HIGH": 29504,

     "HIGH": 0,
     "LOW": 1
   },
+  "model_name": "distilbert/distilbert-base-cased",
   "train_steps": 3688,
   "training_history": {
     "HIGH": 29504,

onnx/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "benchmark_results/onnx/",
   "activation": "gelu",
   "architectures": [
     "DistilBertForMaskedLM"

 {
+  "_name_or_path": "distilbert/distilbert-base-cased",
   "activation": "gelu",
   "architectures": [
     "DistilBertForMaskedLM"

onnx/model_quantized.onnx ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa65416faff96f9f5e2b464d104cc2de5ded732a66e36dc55b391a32c1af3690
+size 65598662

onnx/ort_config.json ADDED Viewed

+{
+  "one_external_file": true,
+  "opset": null,
+  "optimization": {},
+  "quantization": {
+    "activations_dtype": "QUInt8",
+    "activations_symmetric": false,
+    "format": "QOperator",
+    "is_static": false,
+    "mode": "IntegerOps",
+    "nodes_to_exclude": [],
+    "nodes_to_quantize": [],
+    "operators_to_quantize": [
+      "Conv",
+      "MatMul",
+      "Attention",
+      "LSTM",
+      "Gather",
+      "Transpose",
+      "EmbedLayerNormalization"
+    ],
+    "per_channel": false,
+    "qdq_add_pair_to_weight": false,
+    "qdq_dedicated_pair": false,
+    "qdq_op_type_per_channel_support_to_axis": {
+      "MatMul": 1
+    },
+    "reduce_range": false,
+    "weights_dtype": "QInt8",
+    "weights_symmetric": true
+  },
+  "use_external_data_format": false
+}

onnx/special_tokens_map.json ADDED Viewed

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

onnx/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx/tokenizer_config.json ADDED Viewed

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

onnx/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff