jglaser commited on Feb 22, 2022

Commit

6d771a8

•

1 Parent(s): 50a35e3

Add new SentenceTransformer model.

Browse files

Files changed (35) hide show

.gitattributes +3 -0
0_Asym/140438116837408_Transformer/config.json +27 -0
0_Asym/140438116837408_Transformer/pytorch_model.bin +3 -0
0_Asym/140438116837408_Transformer/sentence_bert_config.json +4 -0
0_Asym/140438116837408_Transformer/special_tokens_map.json +1 -0
0_Asym/140438116837408_Transformer/tokenizer.json +364 -0
0_Asym/140438116837408_Transformer/tokenizer_config.json +1 -0
0_Asym/140438116837408_Transformer/vocab.txt +201 -0
0_Asym/140442693121648_Pooling/config.json +7 -0
0_Asym/140442693122032_Dense/config.json +1 -0
0_Asym/140442693122032_Dense/pytorch_model.bin +3 -0
0_Asym/140442693122080_Pooling/config.json +7 -0
0_Asym/140442694969664_Dense/config.json +1 -0
0_Asym/140442694969664_Dense/pytorch_model.bin +3 -0
0_Asym/140442697524080_Transformer/config.json +26 -0
0_Asym/140442697524080_Transformer/pytorch_model.bin +3 -0
0_Asym/140442697524080_Transformer/sentence_bert_config.json +4 -0
0_Asym/140442697524080_Transformer/special_tokens_map.json +1 -0
0_Asym/140442697524080_Transformer/tokenizer.json +197 -0
0_Asym/140442697524080_Transformer/tokenizer_config.json +1 -0
0_Asym/140442697524080_Transformer/vocab.txt +30 -0
0_Asym/config.json +25 -0
1_Dense/config.json +1 -0
1_Dense/pytorch_model.bin +3 -0
2_Dense/config.json +1 -0
2_Dense/pytorch_model.bin +3 -0
3_Dense/config.json +1 -0
3_Dense/pytorch_model.bin +3 -0
4_Dense/config.json +1 -0
4_Dense/pytorch_model.bin +3 -0
5_Dense/config.json +1 -0
5_Dense/pytorch_model.bin +3 -0
README.md +65 -0
config_sentence_transformers.json +7 -0
modules.json +38 -0

.gitattributes CHANGED Viewed

@@ -25,3 +25,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+0_Asym/140442697524080_Transformer/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+0_Asym/140438116837408_Transformer/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+1_Dense/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text

0_Asym/140438116837408_Transformer/config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "_name_or_path": "/netdisk/xvg/smiles_3",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "BertTokenizerFast",
+  "torch_dtype": "float32",
+  "transformers_version": "4.17.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30000
+}

0_Asym/140438116837408_Transformer/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8feac2b61625c9b69a34f996f33a3198c82c5eea062dc14e82e019b0b23178a5
+size 436404785

0_Asym/140438116837408_Transformer/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

0_Asym/140438116837408_Transformer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

0_Asym/140438116837408_Transformer/tokenizer.json ADDED Viewed

	@@ -0,0 +1,364 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "special": true,
+      "content": "[PAD]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 1,
+      "special": true,
+      "content": "[UNK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 2,
+      "special": true,
+      "content": "[CLS]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 3,
+      "special": true,
+      "content": "[SEP]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 4,
+      "special": true,
+      "content": "[MASK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    }
+  ],
+  "normalizer": {
+    "type": "BertNormalizer",
+    "clean_text": true,
+    "handle_chinese_chars": true,
+    "strip_accents": true,
+    "lowercase": false
+  },
+  "pre_tokenizer": {
+    "type": "Sequence",
+    "pretokenizers": [
+      {
+        "type": "WhitespaceSplit"
+      },
+      {
+        "type": "Split",
+        "pattern": {
+          "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])"
+        },
+        "behavior": "Isolated",
+        "invert": false
+      }
+    ]
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "[CLS]": {
+        "id": "[CLS]",
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "[CLS]"
+        ]
+      },
+      "[SEP]": {
+        "id": "[SEP]",
+        "ids": [
+          3
+        ],
+        "tokens": [
+          "[SEP]"
+        ]
+      }
+    }
+  },
+  "decoder": {
+    "type": "WordPiece",
+    "prefix": "##",
+    "cleanup": true
+  },
+  "model": {
+    "type": "WordPiece",
+    "unk_token": "[UNK]",
+    "continuing_subword_prefix": "##",
+    "max_input_chars_per_word": 100,
+    "vocab": {
+      "[PAD]": 0,
+      "[UNK]": 1,
+      "[CLS]": 2,
+      "[SEP]": 3,
+      "[MASK]": 4,
+      "=": 5,
+      "F": 6,
+      "N": 7,
+      "1": 8,
+      "n": 9,
+      "O": 10,
+      "C": 11,
+      "S": 12,
+      "c": 13,
+      "3": 14,
+      "Cl": 15,
+      "o": 16,
+      "[nH]": 17,
+      "s": 18,
+      "-": 19,
+      "2": 20,
+      "4": 21,
+      "[C@H]": 22,
+      ")": 23,
+      "(": 24,
+      "Br": 25,
+      "#": 26,
+      "[C@@H]": 27,
+      "[C@]": 28,
+      "[C@@]": 29,
+      "[O-]": 30,
+      "[N+]": 31,
+      "[C]": 32,
+      "I": 33,
+      "5": 34,
+      "[C-]": 35,
+      "[CH]": 36,
+      "/": 37,
+      "[S@]": 38,
+      "[S@@]": 39,
+      "[SH]": 40,
+      "[Si]": 41,
+      "[n+]": 42,
+      "[C+]": 43,
+      "[S@H]": 44,
+      "[Fe]": 45,
+      "[S@@H]": 46,
+      "B": 47,
+      "[O]": 48,
+      "[S-]": 49,
+      "[P@H]": 50,
+      "[PH]": 51,
+      "6": 52,
+      "[O+]": 53,
+      "[P@@H]": 54,
+      "[NH+]": 55,
+      "[S]": 56,
+      "\\": 57,
+      "[P@]": 58,
+      "[P@@]": 59,
+      "[N]": 60,
+      "P": 61,
+      "[S+]": 62,
+      "[P]": 63,
+      "[IH]": 64,
+      "[se]": 65,
+      "[C@+]": 66,
+      "[Si@]": 67,
+      "[c-]": 68,
+      "7": 69,
+      "[C@-]": 70,
+      "[Si@@]": 71,
+      "[Se]": 72,
+      "[Si@H]": 73,
+      "[S@+]": 74,
+      "[N-]": 75,
+      "[CnH]": 76,
+      "[c+]": 77,
+      "[P+]": 78,
+      "[Si@@H]": 79,
+      "[SiH]": 80,
+      "[P-]": 81,
+      "[I]": 82,
+      "[S@-]": 83,
+      "[CH+]": 84,
+      "[C@H+]": 85,
+      "[I+]": 86,
+      "[C@@-]": 87,
+      "8": 88,
+      "[Si-]": 89,
+      "[C@@H+]": 90,
+      "[I-]": 91,
+      "[CH-]": 92,
+      "[P@+]": 93,
+      "[Cn]": 94,
+      "[C@@+]": 95,
+      "[SnH]": 96,
+      "[Se@]": 97,
+      "[S@@+]": 98,
+      "[Se@@]": 99,
+      "[Si+]": 100,
+      "9": 101,
+      "[InH]": 102,
+      "[Ce]": 103,
+      "[I@@]": 104,
+      "[P@-]": 105,
+      "[Se-]": 106,
+      "[Sc]": 107,
+      "[SH+]": 108,
+      "[I@@H]": 109,
+      "[I@H]": 110,
+      "[c]": 111,
+      "[SH-]": 112,
+      "[N@]": 113,
+      "[I@]": 114,
+      "[N@@]": 115,
+      "[P@@-]": 116,
+      "%10": 117,
+      "[nH+]": 118,
+      "[Ne]": 119,
+      "[Si@+]": 120,
+      "[Cl+]": 121,
+      "[Br+]": 122,
+      "[N@H+]": 123,
+      "[S@@-]": 124,
+      "[N@+]": 125,
+      "[n-]": 126,
+      "[NH2+]": 127,
+      "[B]": 128,
+      "[Cn+]": 129,
+      "[N@@H+]": 130,
+      "[s+]": 131,
+      "[si]": 132,
+      "[N@@+]": 133,
+      "p": 134,
+      "[P@@+]": 135,
+      "[Sc@H]": 136,
+      "[cH+]": 137,
+      "[ScH]": 138,
+      "[Fe+]": 139,
+      "[o+]": 140,
+      "[cH-]": 141,
+      "[NH]": 142,
+      "[Se+]": 143,
+      "[Fe-]": 144,
+      "[I@+]": 145,
+      "[Sc@]": 146,
+      "[B-]": 147,
+      "[SeH]": 148,
+      "[Se@+]": 149,
+      "[FeH]": 150,
+      "[Fe@@]": 151,
+      "[Cn-]": 152,
+      "[S@@H+]": 153,
+      "%11": 154,
+      "[S@H-]": 155,
+      "[S@@H-]": 156,
+      "[CH2-]": 157,
+      "[CnH-]": 158,
+      "[In]": 159,
+      "[S@H+]": 160,
+      "[pH]": 161,
+      "[PH+]": 162,
+      "[Fe@@H]": 163,
+      "[PH-]": 164,
+      "b": 165,
+      "[Si@-]": 166,
+      "[si-]": 167,
+      "[sH+]": 168,
+      "[Fe@H]": 169,
+      "[P@@H-]": 170,
+      "[Sn+]": 171,
+      "[P@@H+]": 172,
+      "[P@H+]": 173,
+      "[P@H-]": 174,
+      "[Se@H]": 175,
+      "[Se@-]": 176,
+      "[Sc@@H]": 177,
+      "[I@-]": 178,
+      "[Sn]": 179,
+      "[IH-]": 180,
+      "[IH+]": 181,
+      "[F+]": 182,
+      "[Fe@]": 183,
+      "[Sn@]": 184,
+      "[CnH+]": 185,
+      "[Si@H-]": 186,
+      "[NH-]": 187,
+      "[Si@H+]": 188,
+      "[Sc-]": 189,
+      "[Se@@H]": 190,
+      "[Sn-]": 191,
+      "[se+]": 192,
+      "[Si@@+]": 193,
+      "[SiH+]": 194,
+      "[PH2-]": 195,
+      "[CeH]": 196,
+      "[SH3]": 197,
+      "[CH2]": 198,
+      "[Sc+]": 199,
+      "[Si@@-]": 200
+    }
+  }
+}

0_Asym/140438116837408_Transformer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": true, "model_max_length": 512, "name_or_path": "/netdisk/xvg/smiles_tokenizer", "model_type": "bert", "special_tokens_map_file": "/home/xvg/affinity_pred/train/tokenizer_regex/tokenizer/special_tokens_map.json", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}

0_Asym/140438116837408_Transformer/vocab.txt ADDED Viewed

	@@ -0,0 +1,201 @@

+[PAD]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+=
+F
+N
+1
+n
+O
+C
+S
+c
+3
+Cl
+o
+[nH]
+s
+-
+2
+4
+[C@H]
+)
+(
+Br
+#
+[C@@H]
+[C@]
+[C@@]
+[O-]
+[N+]
+[C]
+I
+5
+[C-]
+[CH]
+/
+[S@]
+[S@@]
+[SH]
+[Si]
+[n+]
+[C+]
+[S@H]
+[Fe]
+[S@@H]
+B
+[O]
+[S-]
+[P@H]
+[PH]
+6
+[O+]
+[P@@H]
+[NH+]
+[S]
+\
+[P@]
+[P@@]
+[N]
+P
+[S+]
+[P]
+[IH]
+[se]
+[C@+]
+[Si@]
+[c-]
+7
+[C@-]
+[Si@@]
+[Se]
+[Si@H]
+[S@+]
+[N-]
+[CnH]
+[c+]
+[P+]
+[Si@@H]
+[SiH]
+[P-]
+[I]
+[S@-]
+[CH+]
+[C@H+]
+[I+]
+[C@@-]
+8
+[Si-]
+[C@@H+]
+[I-]
+[CH-]
+[P@+]
+[Cn]
+[C@@+]
+[SnH]
+[Se@]
+[S@@+]
+[Se@@]
+[Si+]
+9
+[InH]
+[Ce]
+[I@@]
+[P@-]
+[Se-]
+[Sc]
+[SH+]
+[I@@H]
+[I@H]
+[c]
+[SH-]
+[N@]
+[I@]
+[N@@]
+[P@@-]
+%10
+[nH+]
+[Ne]
+[Si@+]
+[Cl+]
+[Br+]
+[N@H+]
+[S@@-]
+[N@+]
+[n-]
+[NH2+]
+[B]
+[Cn+]
+[N@@H+]
+[s+]
+[si]
+[N@@+]
+p
+[P@@+]
+[Sc@H]
+[cH+]
+[ScH]
+[Fe+]
+[o+]
+[cH-]
+[NH]
+[Se+]
+[Fe-]
+[I@+]
+[Sc@]
+[B-]
+[SeH]
+[Se@+]
+[FeH]
+[Fe@@]
+[Cn-]
+[S@@H+]
+%11
+[S@H-]
+[S@@H-]
+[CH2-]
+[CnH-]
+[In]
+[S@H+]
+[pH]
+[PH+]
+[Fe@@H]
+[PH-]
+b
+[Si@-]
+[si-]
+[sH+]
+[Fe@H]
+[P@@H-]
+[Sn+]
+[P@@H+]
+[P@H+]
+[P@H-]
+[Se@H]
+[Se@-]
+[Sc@@H]
+[I@-]
+[Sn]
+[IH-]
+[IH+]
+[F+]
+[Fe@]
+[Sn@]
+[CnH+]
+[Si@H-]
+[NH-]
+[Si@H+]
+[Sc-]
+[Se@@H]
+[Sn-]
+[se+]
+[Si@@+]
+[SiH+]
+[PH2-]
+[CeH]
+[SH3]
+[CH2]
+[Sc+]
+[Si@@-]

0_Asym/140442693121648_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false
+}

0_Asym/140442693122032_Dense/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"in_features": 1024, "out_features": 1024, "bias": true, "activation_function": "torch.nn.modules.activation.Tanh"}

0_Asym/140442693122032_Dense/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f0f8caa874c8a31f63f88292a5a802bd58590a6d116ad13f4b9d1f8d64b1e3
+size 4199463

0_Asym/140442693122080_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "word_embedding_dimension": 1024,
+  "pooling_mode_cls_token": true,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false
+}

0_Asym/140442694969664_Dense/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"in_features": 768, "out_features": 768, "bias": true, "activation_function": "torch.nn.modules.activation.Tanh"}

0_Asym/140442694969664_Dense/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90dd4b612a2a258b8b3ff0dc77a5cf3b852c305aa7783dfa8a7e8d4804282be2
+size 2363431

0_Asym/140442697524080_Transformer/config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "_name_or_path": "/netdisk/xvg/seq_3",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 40000,
+  "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 30,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "BertTokenizerFast",
+  "torch_dtype": "float32",
+  "transformers_version": "4.17.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30
+}

0_Asym/140442697524080_Transformer/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99210b2606357dacdde59557ffe899a5c390ae7f9a28ecd4f5383a096f7fc88b
+size 1680230449

0_Asym/140442697524080_Transformer/sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 2048,
+  "do_lower_case": false
+}

0_Asym/140442697524080_Transformer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}

0_Asym/140442697524080_Transformer/tokenizer.json ADDED Viewed

	@@ -0,0 +1,197 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "special": true,
+      "content": "[PAD]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 1,
+      "special": true,
+      "content": "[UNK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 2,
+      "special": true,
+      "content": "[CLS]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 3,
+      "special": true,
+      "content": "[SEP]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 4,
+      "special": true,
+      "content": "[MASK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    }
+  ],
+  "normalizer": {
+    "type": "Sequence",
+    "normalizers": [
+      {
+        "type": "Replace",
+        "pattern": {
+          "Regex": "[UZOB]"
+        },
+        "content": "X"
+      },
+      {
+        "type": "Replace",
+        "pattern": {
+          "Regex": "\\s"
+        },
+        "content": ""
+      }
+    ]
+  },
+  "pre_tokenizer": {
+    "type": "Split",
+    "pattern": {
+      "Regex": ""
+    },
+    "behavior": "Isolated",
+    "invert": false
+  },
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "[CLS]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[SEP]",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "[CLS]": {
+        "id": "[CLS]",
+        "ids": [
+          2
+        ],
+        "tokens": [
+          "[CLS]"
+        ]
+      },
+      "[SEP]": {
+        "id": "[SEP]",
+        "ids": [
+          3
+        ],
+        "tokens": [
+          "[SEP]"
+        ]
+      }
+    }
+  },
+  "decoder": {
+    "type": "WordPiece",
+    "prefix": "##",
+    "cleanup": true
+  },
+  "model": {
+    "type": "WordPiece",
+    "unk_token": "[UNK]",
+    "continuing_subword_prefix": "##",
+    "max_input_chars_per_word": 100,
+    "vocab": {
+      "[PAD]": 0,
+      "[UNK]": 1,
+      "[CLS]": 2,
+      "[SEP]": 3,
+      "[MASK]": 4,
+      "L": 5,
+      "A": 6,
+      "G": 7,
+      "V": 8,
+      "E": 9,
+      "S": 10,
+      "I": 11,
+      "K": 12,
+      "R": 13,
+      "D": 14,
+      "T": 15,
+      "P": 16,
+      "N": 17,
+      "Q": 18,
+      "F": 19,
+      "Y": 20,
+      "M": 21,
+      "H": 22,
+      "C": 23,
+      "W": 24,
+      "X": 25,
+      "U": 26,
+      "B": 27,
+      "Z": 28,
+      "O": 29
+    }
+  }
+}

0_Asym/140442697524080_Transformer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "full_tokenizer_file": null, "name_or_path": "/netdisk/xvg/seq_tokenizer", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}

0_Asym/140442697524080_Transformer/vocab.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+[PAD]
+[UNK]
+[CLS]
+[SEP]
+[MASK]
+L
+A
+G
+V
+E
+S
+I
+K
+R
+D
+T
+P
+N
+Q
+F
+Y
+M
+H
+C
+W
+X
+U
+B
+Z
+O

0_Asym/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "types": {
+    "140442697524080_Transformer": "sentence_transformers.models.Transformer",
+    "140442693122080_Pooling": "sentence_transformers.models.Pooling",
+    "140442693122032_Dense": "sentence_transformers.models.Dense",
+    "140438116837408_Transformer": "sentence_transformers.models.Transformer",
+    "140442693121648_Pooling": "sentence_transformers.models.Pooling",
+    "140442694969664_Dense": "sentence_transformers.models.Dense"
+  },
+  "structure": {
+    "protein": [
+      "140442697524080_Transformer",
+      "140442693122080_Pooling",
+      "140442693122032_Dense"
+    ],
+    "ligand": [
+      "140438116837408_Transformer",
+      "140442693121648_Pooling",
+      "140442694969664_Dense"
+    ]
+  },
+  "parameters": {
+    "allow_empty_key": true
+  }
+}

1_Dense/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"in_features": 1792, "out_features": 1000, "bias": true, "activation_function": "torch.nn.modules.activation.GELU"}

1_Dense/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:422ed1090145e47572fe91448f0fdd8856a15d41aaeb122428ce40afc1d2bdc5
+size 7173031

2_Dense/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"in_features": 1000, "out_features": 1000, "bias": true, "activation_function": "torch.nn.modules.activation.GELU"}

2_Dense/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:876ca9fd237666e79fd3b37bf2eba4e6065c61739664dcfcfd83b636eca92e2a
+size 4005031

3_Dense/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"in_features": 1000, "out_features": 1000, "bias": true, "activation_function": "torch.nn.modules.activation.GELU"}

3_Dense/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90907c8faabb7c8794fa1860163abd69ef4a859462aa3c764bd18ebfbc69d4d1
+size 4005031

4_Dense/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"in_features": 1000, "out_features": 1, "bias": true, "activation_function": "torch.nn.modules.linear.Identity"}

4_Dense/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e5e815c48ff878ee203df55eee59079291a00cc2d8fc2eeb5d5f58a570b6613
+size 5095

5_Dense/config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"in_features": 1, "out_features": 1, "bias": true, "activation_function": "torch.nn.modules.linear.Identity"}

5_Dense/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3c35313827b4d38e21bbf665cd5b9e2b261d24f4d9eafb31fe19a3b7f089c36
+size 1127

README.md ADDED Viewed

	@@ -0,0 +1,65 @@

+---
+pipeline_tag: sentence-similarity
+tags:
+- sentence-transformers
+- feature-extraction
+- sentence-similarity
+---
+# jglaser/protein-ligand-mlp-3
+This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 1 dimensional dense vector space and can be used for tasks like clustering or semantic search.
+<!--- Describe your model here -->
+## Usage (Sentence-Transformers)
+Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
+```
+pip install -U sentence-transformers
+```
+Then you can use the model like this:
+```python
+from sentence_transformers import SentenceTransformer
+sentences = ["This is an example sentence", "Each sentence is converted"]
+model = SentenceTransformer('jglaser/protein-ligand-mlp-3')
+embeddings = model.encode(sentences)
+print(embeddings)
+```
+## Evaluation Results
+<!--- Describe how your model was evaluated -->
+For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=jglaser/protein-ligand-mlp-3)
+## Full Model Architecture
+```
+SentenceTransformer(
+  (0): Asym(
+    (protein-0): Transformer({'max_seq_length': 2048, 'do_lower_case': False}) with Transformer model: BertModel
+    (protein-1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
+    (protein-2): Dense({'in_features': 1024, 'out_features': 1024, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
+    (ligand-0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
+    (ligand-1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
+    (ligand-2): Dense({'in_features': 768, 'out_features': 768, 'bias': True, 'activation_function': 'torch.nn.modules.activation.Tanh'})
+  )
+  (1): Dense({'in_features': 1792, 'out_features': 1000, 'bias': True, 'activation_function': 'torch.nn.modules.activation.GELU'})
+  (2): Dense({'in_features': 1000, 'out_features': 1000, 'bias': True, 'activation_function': 'torch.nn.modules.activation.GELU'})
+  (3): Dense({'in_features': 1000, 'out_features': 1000, 'bias': True, 'activation_function': 'torch.nn.modules.activation.GELU'})
+  (4): Dense({'in_features': 1000, 'out_features': 1, 'bias': True, 'activation_function': 'torch.nn.modules.linear.Identity'})
+  (5): Dense({'in_features': 1, 'out_features': 1, 'bias': True, 'activation_function': 'torch.nn.modules.linear.Identity'})
+)
+```
+## Citing & Authors
+<!--- Describe where people can find more information -->

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "__version__": {
+    "sentence_transformers": "2.2.0",
+    "transformers": "4.17.0.dev0",
+    "pytorch": "1.10.2"
+  }
+}

modules.json ADDED Viewed

	@@ -0,0 +1,38 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "0_Asym",
+    "type": "sentence_transformers.models.Asym"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Dense",
+    "type": "sentence_transformers.models.Dense"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Dense",
+    "type": "sentence_transformers.models.Dense"
+  },
+  {
+    "idx": 3,
+    "name": "3",
+    "path": "3_Dense",
+    "type": "sentence_transformers.models.Dense"
+  },
+  {
+    "idx": 4,
+    "name": "4",
+    "path": "4_Dense",
+    "type": "sentence_transformers.models.Dense"
+  },
+  {
+    "idx": 5,
+    "name": "5",
+    "path": "5_Dense",
+    "type": "sentence_transformers.models.Dense"
+  }
+]