commit files to HF hub

Browse files

Files changed (10) hide show

all_results.json +8 -0
best_configure.yaml +539 -0
config.json +35 -0
eval_results.json +8 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
training_args.bin +3 -0
vocab.txt +0 -0

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.0,
+    "eval_accuracy": 0.9140625,
+    "eval_loss": 0.4163683354854584,
+    "eval_runtime": 7.4147,
+    "eval_samples_per_second": 34.526,
+    "eval_steps_per_second": 4.316
+}

best_configure.yaml ADDED Viewed

	@@ -0,0 +1,539 @@

+advance: null
+approach: post_training_dynamic_quant
+bf16_ops_list: []
+calib_iteration: 1
+calib_sampling_size: 100
+framework: pytorch
+op:
+  ? !!python/tuple
+  - distilbert.transformer.layer.0.attention.q_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.0.attention.k_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.0.attention.v_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.0.attention.out_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.0.ffn.lin1
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.0.ffn.lin2
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.1.attention.q_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.1.attention.k_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.1.attention.v_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.1.attention.out_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.1.ffn.lin1
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.1.ffn.lin2
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.2.attention.q_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.2.attention.k_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.2.attention.v_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.2.attention.out_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.2.ffn.lin1
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.2.ffn.lin2
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.3.attention.q_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.3.attention.k_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.3.attention.v_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.3.attention.out_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.3.ffn.lin1
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.3.ffn.lin2
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.4.attention.q_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.4.attention.k_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.4.attention.v_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.4.attention.out_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.4.ffn.lin1
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.4.ffn.lin2
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.5.attention.q_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.5.attention.k_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.5.attention.v_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.5.attention.out_lin
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.5.ffn.lin1
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - distilbert.transformer.layer.5.ffn.lin2
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - pre_classifier
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax
+  ? !!python/tuple
+  - classifier
+  - Linear
+  : weight:
+      dtype: int8
+      scheme: sym
+      granularity: per_channel
+      algorithm: minmax
+      bit: 7.0
+    activation:
+      dtype: uint8
+      scheme: asym
+      granularity: per_tensor
+      algorithm: minmax

config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "finetuning_task": "sst-2",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "NEGATIVE",
+    "1": "POSITIVE"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "NEGATIVE": 0,
+    "POSITIVE": 1
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.16.2",
+  "vocab_size": 30522
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.0,
+    "eval_accuracy": 0.9140625,
+    "eval_loss": 0.4163683354854584,
+    "eval_runtime": 7.4147,
+    "eval_samples_per_second": 34.526,
+    "eval_steps_per_second": 4.316
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c26bbb1c8fe2e14ebb47bb01a181fbecbc9be9701eebca44aa7aa59af26fa0d
+size 139407425

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "distilbert-base-uncased-finetuned-sst-2-english", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "DistilBertTokenizer"}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:517b5da8c631bd38130817f6712c8388a1c2674d4b6c2214a2d0ba5cd8999187
+size 2991

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff