Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +64 -0
config.json +24 -0
generation_config.json +5 -0
pytorch_model.bin +3 -0
special_tokens_map.json +7 -0
tokenizer.json +218 -0
tokenizer_config.json +15 -0

README.md CHANGED Viewed

@@ -1,3 +1,67 @@
 ---
 license: mit
 ---

 ---
 license: mit
+tags:
+- automated-planning
+- masked-language-modeling
+- bert
+---
+# BERT for Automated Planning (Depots)
+This is a BERT model pretrained on Masked Language Modelling (MLM), specifically developed to tackle tasks related to Automated Planning within the Depots domain.
+You can find its full description, methodology, and experimental results in our paper: **[A Preliminary Study on BERT applied to Automated Planning](https://ceur-ws.org/Vol-3345/paper7_3460.pdf)**.
+## Usage
+You can easily load the model and the tokenizer using the Hugging Face `transformers` library:
+```python
+from transformers import BertForMaskedLM, PreTrainedTokenizerFast
+tokenizer = PreTrainedTokenizerFast.from_pretrained("lore-seri97/bert-depots")
+model = BertForMaskedLM.from_pretrained("lore-seri97/bert-depots")
+```
+## Citation
+If you use this model in your research, please cite our work using the following BibTeX entry:
+```bibtex
+@inproceedings{DBLP:conf/aiia/SerinaCGPS22,
+  author       = {Lorenzo Serina and
+                  Mattia Chiari and
+                  Alfonso Emilio Gerevini and
+                  Luca Putelli and
+                  Ivan Serina},
+  editor       = {Riccardo De Benedictis and
+                  Nicola Gatti and
+                  Marco Maratea and
+                  Andrea Micheli and
+                  Aniello Murano and
+                  Enrico Scala and
+                  Luciano Serafini and
+                  Ivan Serina and
+                  Alessandro Umbrico and
+                  Mauro Vallati},
+  title        = {A Preliminary Study on {BERT} applied to Automated Planning},
+  booktitle    = {Proceedings of the 10th Italian workshop on Planning and Scheduling
+                  {(IPS} 2022), {RCRA} Incontri {E} Confronti (RiCeRcA 2022), and the
+                  workshop on Strategies, Prediction, Interaction, and Reasoning in
+                  Italy {(SPIRIT} 2022) co-located with 21st International Conference
+                  of the Italian Association for Artificial Intelligence (AIxIA 2022),
+                  November 28 - December 2, 2022, University of Udine, Udine, Italy},
+  series       = {{CEUR} Workshop Proceedings},
+  volume       = {3345},
+  publisher    = {CEUR-WS.org},
+  year         = {2022},
+  url          = {[https://ceur-ws.org/Vol-3345/paper7](https://ceur-ws.org/Vol-3345/paper7)\_3460.pdf},
+  timestamp    = {Fri, 10 Mar 2023 16:23:01 +0100},
+  biburl       = {[https://dblp.org/rec/conf/aiia/SerinaCGPS22.bib](https://dblp.org/rec/conf/aiia/SerinaCGPS22.bib)},
+  bibsource    = {dblp computer science bibliography, [https://dblp.org](https://dblp.org)}
+}
+```
 ---

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "BertForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 8,
+  "num_hidden_layers": 8,
+  "pad_token_id": 3,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 51
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "_from_model_config": true,
+  "pad_token_id": 3,
+  "transformers_version": "4.30.0.dev0"
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1ff6b12c6efd9cde0ff9d74abd323dc97d6705e999f0db18b585e32b8203dab
+size 180609465

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,218 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "content": "[UNK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 1,
+      "content": "[CLS]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 2,
+      "content": "[SEP]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 3,
+      "content": "[PAD]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 4,
+      "content": "[MASK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 5,
+      "content": "[A]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 6,
+      "content": "[SI]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 7,
+      "content": "[SG]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": {
+    "type": "Lowercase"
+  },
+  "pre_tokenizer": {
+    "type": "WhitespaceSplit"
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "[CLS]": {
+        "id": "[CLS]",
+        "ids": [
+          1
+        ],
+        "tokens": [
+          "[CLS]"
+        ]
+      },
+      "[SEP]": {
+        "id": "[SEP]",
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "[SEP]"
+        ]
+      }
+    }
+  },
+  "decoder": null,
+  "model": {
+    "type": "WordLevel",
+    "vocab": {
+      "[UNK]": 0,
+      "[CLS]": 1,
+      "[SEP]": 2,
+      "[PAD]": 3,
+      "[MASK]": 4,
+      "[A]": 5,
+      "[SI]": 6,
+      "[SG]": 7,
+      "on": 8,
+      "at": 9,
+      "lift": 10,
+      "distributor2": 11,
+      "distributor1": 12,
+      "distributor0": 13,
+      "drop": 14,
+      "depot1": 15,
+      "depot2": 16,
+      "depot0": 17,
+      "crate9": 18,
+      "crate2": 19,
+      "crate1": 20,
+      "crate0": 21,
+      "crate6": 22,
+      "crate8": 23,
+      "crate4": 24,
+      "crate5": 25,
+      "load": 26,
+      "crate3": 27,
+      "crate7": 28,
+      "truck1": 29,
+      "truck2": 30,
+      "truck0": 31,
+      "unload": 32,
+      "hoist1": 33,
+      "hoist2": 34,
+      "hoist5": 35,
+      "hoist3": 36,
+      "hoist0": 37,
+      "hoist4": 38,
+      "drive": 39,
+      "clear": 40,
+      "available": 41,
+      "pallet5": 42,
+      "pallet2": 43,
+      "pallet3": 44,
+      "pallet1": 45,
+      "pallet4": 46,
+      "pallet0": 47,
+      "[a]": 48,
+      "[sg]": 49,
+      "[si]": 50
+    },
+    "unk_token": "[UNK]"
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "action_token": "[A]",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "goal_token": "[SG]",
+  "init_token": "[SI]",
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "padding": "max_length",
+  "sep_token": "[SEP]",
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation": true,
+  "unk_token": "[UNK]"
+}