MSLacerda/attribute_mining_mslacerda

Browse files

Files changed (12) hide show

README.md +77 -0
config.json +130 -0
model.safetensors +3 -0
pytorch_model.bin +3 -0
runs/Jun04_02-55-04_c7cfbdc033d7/events.out.tfevents.1717469708.c7cfbdc033d7 +3 -0
runs/Jun04_02-56-39_c7cfbdc033d7/events.out.tfevents.1717469803.c7cfbdc033d7 +3 -0
runs/Jun04_02-56-39_c7cfbdc033d7/events.out.tfevents.1717471313.c7cfbdc033d7 +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +55 -0
training_args.bin +3 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,77 @@

+---
+license: apache-2.0
+base_model: distilbert/distilbert-base-uncased
+tags:
+- generated_from_trainer
+metrics:
+- precision
+- recall
+- f1
+- accuracy
+model-index:
+- name: attribute_minig_mslacerda
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# attribute_minig_mslacerda
+This model is a fine-tuned version of [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.3079
+- Precision: 0.6199
+- Recall: 0.5775
+- F1: 0.5979
+- Accuracy: 0.6640
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 512
+- eval_batch_size: 128
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 12
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
+|:-------------:|:-----:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| No log        | 1.0   | 3    | 1.6782          | 0.5377    | 0.5324 | 0.5350 | 0.5856   |
+| No log        | 2.0   | 6    | 1.6105          | 0.5505    | 0.5299 | 0.54   | 0.5943   |
+| No log        | 3.0   | 9    | 1.5469          | 0.5696    | 0.5399 | 0.5543 | 0.6084   |
+| No log        | 4.0   | 12   | 1.4938          | 0.5799    | 0.5457 | 0.5623 | 0.6198   |
+| No log        | 5.0   | 15   | 1.4528          | 0.5854    | 0.5441 | 0.5639 | 0.6264   |
+| No log        | 6.0   | 18   | 1.4109          | 0.5972    | 0.5578 | 0.5769 | 0.6381   |
+| No log        | 7.0   | 21   | 1.3794          | 0.6055    | 0.5633 | 0.5836 | 0.6465   |
+| No log        | 8.0   | 24   | 1.3563          | 0.6162    | 0.5678 | 0.5910 | 0.6523   |
+| No log        | 9.0   | 27   | 1.3380          | 0.6153    | 0.5683 | 0.5908 | 0.6550   |
+| No log        | 10.0  | 30   | 1.3226          | 0.6162    | 0.5712 | 0.5928 | 0.6592   |
+| No log        | 11.0  | 33   | 1.3120          | 0.6186    | 0.5749 | 0.5960 | 0.6628   |
+| No log        | 12.0  | 36   | 1.3079          | 0.6199    | 0.5775 | 0.5979 | 0.6640   |
+### Framework versions
+- Transformers 4.41.2
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.2
+- Tokenizers 0.19.1

config.json ADDED Viewed

	@@ -0,0 +1,130 @@

+{
+  "_name_or_path": "distilbert/distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForTokenClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "Brand",
+    "1": "Item form",
+    "2": "Specialty",
+    "3": "Flavor",
+    "4": "Net content",
+    "5": "Pack size",
+    "6": "Roast type",
+    "7": "Caffeine content",
+    "8": "Machine type",
+    "9": "Container type",
+    "10": "Country",
+    "11": "Gender",
+    "12": "Model name",
+    "13": "Shoe type",
+    "14": "Color",
+    "15": "Size",
+    "16": "Material",
+    "17": "Closure",
+    "18": "Sport",
+    "19": "Age",
+    "20": "Waterproof",
+    "21": "Power source",
+    "22": "Cap",
+    "23": "Operation mode",
+    "24": "Firmness",
+    "25": "Animal",
+    "26": "Volume",
+    "27": "Scent",
+    "28": "Benefit",
+    "29": "Hair type",
+    "30": "Supplement type",
+    "31": "Health benefit",
+    "32": "Dosage",
+    "33": "Diet",
+    "34": "Supply size",
+    "35": "Administration type",
+    "36": "Load size",
+    "37": "Specific uses",
+    "38": "Skin type",
+    "39": "Tea variety",
+    "40": "Protection level",
+    "41": "Mask type",
+    "42": "Age range",
+    "43": "Reusability",
+    "44": "Layer",
+    "45": "Closure type",
+    "46": "Package type",
+    "47": "Certified grade",
+    "48": "Container",
+    "49": "Sub brand",
+    "50": "Cereal type"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "Administration type": 35,
+    "Age": 19,
+    "Age range": 42,
+    "Animal": 25,
+    "Benefit": 28,
+    "Brand": 0,
+    "Caffeine content": 7,
+    "Cap": 22,
+    "Cereal type": 50,
+    "Certified grade": 47,
+    "Closure": 17,
+    "Closure type": 45,
+    "Color": 14,
+    "Container": 48,
+    "Container type": 9,
+    "Country": 10,
+    "Diet": 33,
+    "Dosage": 32,
+    "Firmness": 24,
+    "Flavor": 3,
+    "Gender": 11,
+    "Hair type": 29,
+    "Health benefit": 31,
+    "Item form": 1,
+    "Layer": 44,
+    "Load size": 36,
+    "Machine type": 8,
+    "Mask type": 41,
+    "Material": 16,
+    "Model name": 12,
+    "Net content": 4,
+    "Operation mode": 23,
+    "Pack size": 5,
+    "Package type": 46,
+    "Power source": 21,
+    "Protection level": 40,
+    "Reusability": 43,
+    "Roast type": 6,
+    "Scent": 27,
+    "Shoe type": 13,
+    "Size": 15,
+    "Skin type": 38,
+    "Specialty": 2,
+    "Specific uses": 37,
+    "Sport": 18,
+    "Sub brand": 49,
+    "Supplement type": 30,
+    "Supply size": 34,
+    "Tea variety": 39,
+    "Volume": 26,
+    "Waterproof": 20
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "vocab_size": 30522
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb7bbb00792e6f5d7827b1d0ed9e788e5be03e6d3127ab695656240f12402442
+size 265620748

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:446eab74a72ac8b3468b3937c6538088c54463a89de054c90a8ed4de037412c3
+size 265649442

runs/Jun04_02-55-04_c7cfbdc033d7/events.out.tfevents.1717469708.c7cfbdc033d7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e06b292488328ff27560d92b2a5e125840cd7448d998e27ecc5724b4d700a50
+size 16358

runs/Jun04_02-56-39_c7cfbdc033d7/events.out.tfevents.1717469803.c7cfbdc033d7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47c5d7b8eeb49668b49488bbd4f90c47eb6a5924a81ba823976353200e240011
+size 12966

runs/Jun04_02-56-39_c7cfbdc033d7/events.out.tfevents.1717471313.c7cfbdc033d7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45432b251acb2d68074c1c6f63b838c84d16e0c909ba2d2273acf235da0abad4
+size 503

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0367ac454c92d5b2220f6515aaadcf8eabdad96e83eddca3cb1d02fe6b531542
+size 5176

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff