numBery commited on
Commit
bd33eb3
1 Parent(s): c721a40

Add new SentenceTransformer model.

Browse files
Files changed (5) hide show
  1. .gitattributes +2 -0
  2. README.md +3 -3
  3. config.json +4 -2
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +1 -1
.gitattributes CHANGED
@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
29
+ .git/lfs/objects/c3/a8/c3a85f238711653950f6a79ece63eb0ea93d76f6a6284be04019c53733baf256 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -24,7 +24,7 @@ Then you can use the model like this:
24
  from sentence_transformers import SentenceTransformer
25
  sentences = ["This is an example sentence", "Each sentence is converted"]
26
 
27
- model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction')
28
  embeddings = model.encode(sentences)
29
  print(embeddings)
30
  ```
@@ -48,8 +48,8 @@ def mean_pooling(model_output, attention_mask):
48
  sentences = ['This is an example sentence', 'Each sentence is converted']
49
 
50
  # Load model from HuggingFace Hub
51
- tokenizer = AutoTokenizer.from_pretrained('valurank/MiniLM-L6-Keyword-Extraction')
52
- model = AutoModel.from_pretrained('valurank/MiniLM-L6-Keyword-Extraction')
53
 
54
  # Tokenize sentences
55
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
24
  from sentence_transformers import SentenceTransformer
25
  sentences = ["This is an example sentence", "Each sentence is converted"]
26
 
27
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
28
  embeddings = model.encode(sentences)
29
  print(embeddings)
30
  ```
48
  sentences = ['This is an example sentence', 'Each sentence is converted']
49
 
50
  # Load model from HuggingFace Hub
51
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
52
+ model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
53
 
54
  # Tokenize sentences
55
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "_name_or_path": "nreimers/MiniLM-L6-H384-uncased",
3
  "architectures": [
4
  "BertModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
 
7
  "gradient_checkpointing": false,
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
@@ -17,7 +18,8 @@
17
  "num_hidden_layers": 6,
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
- "transformers_version": "4.8.2",
 
21
  "type_vocab_size": 2,
22
  "use_cache": true,
23
  "vocab_size": 30522
1
  {
2
+ "_name_or_path": "C:\\Users\\nikhi/.cache\\torch\\sentence_transformers\\sentence-transformers_all-MiniLM-L6-v2\\",
3
  "architectures": [
4
  "BertModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
  "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
18
  "num_hidden_layers": 6,
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.18.0",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "name_or_path": "nreimers/MiniLM-L6-H384-uncased", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer", "model_max_length": 512}
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "name_or_path": "C:\\Users\\nikhi/.cache\\torch\\sentence_transformers\\sentence-transformers_all-MiniLM-L6-v2\\", "do_basic_tokenize": true, "never_split": null, "model_max_length": 512, "special_tokens_map_file": "C:\\Users\\nikhi/.cache\\torch\\sentence_transformers\\sentence-transformers_all-MiniLM-L6-v2\\special_tokens_map.json", "tokenizer_class": "BertTokenizer"}