initial commit

Browse files

Files changed (7) hide show

.gitattributes +2 -0
.gitignore +1 -0
README.md +66 -0
convert.py +18 -0
pytorch_model.onnx +3 -0
requirements.txt +4 -0
vocab.txt +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.onnx filter=lfs diff=lfs merge=lfs -text
2	+ vocab.txt filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv

README.md ADDED Viewed

	@@ -0,0 +1,66 @@

+---
+tags:
+- sentence-transformers
+- feature-extraction
+- sentence-similarity
+language: en
+license: apache-2.0
+datasets:
+- s2orc
+- flax-sentence-embeddings/stackexchange_xml
+- ms_marco
+- gooaq
+- yahoo_answers_topics
+- code_search_net
+- search_qa
+- eli5
+- snli
+- multi_nli
+- wikihow
+- natural_questions
+- trivia_qa
+- embedding-data/sentence-compression
+- embedding-data/flickr30k-captions
+- embedding-data/altlex
+- embedding-data/simple-wiki
+- embedding-data/QQP
+- embedding-data/SPECTER
+- embedding-data/PAQ_pairs
+- embedding-data/WikiAnswers
+---
+# ONNX version of sentence-transormers/all-mpnet-base-v2
+This is a sentence-transformers model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search. The ONNX version of this model is made for the [Metarank](https://github.com/metarank/metarank) re-ranker
+to do semantic similarity.
+Check out the [main Metarank docs](https://docs.metarank.ai) on how to configure it.
+TLDR:
+```yaml
+- type: field_match
+  name: title_query_match
+  rankingField: ranking.query
+  itemField: item.title
+  distance: cos
+  method:
+    type: bert
+    model: metarank/all-mpnet-base-v2
+```
+## Building the model
+```shell
+$> pip install -r requirements.txt
+$> python convert.py
+============= Diagnostic Run torch.onnx.export version 2.0.0+cu117 =============
+verbose: False, log level: Level.ERROR
+======================= 0 NONE 0 NOTE 0 WARNING 0 ERROR ========================
+```
+## License
+Apache 2.0

convert.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from transformers import AutoTokenizer, AutoModel
+import torch
+max_seq_length=128
+model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
+model.eval()
+inputs = {"input_ids": torch.ones(1, max_seq_length, dtype=torch.int64),
+        "attention_mask": torch.ones(1, max_seq_length, dtype=torch.int64),
+        "token_type_ids": torch.ones(1, max_seq_length, dtype=torch.int64)}
+symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
+torch.onnx.export(model, args=tuple(inputs.values()), f='pytorch_model.onnx', export_params=True,
+    input_names=['input_ids', 'attention_mask', 'token_type_ids'], output_names=['last_hidden_state'],
+    dynamic_axes={'input_ids': symbolic_names, 'attention_mask': symbolic_names, 'token_type_ids': symbolic_names})

pytorch_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b994d56413c0ab6d5e9072cc5180cbcf277b2e7f2d897959d762b9579963f7b9
+size 438198451

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+sentence-transformers==2.2.2
+torch==2.0.0
+onnx==1.13.1
+huggingface_hub==0.13.3

vocab.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07eced375cec144d27c900241f3e339478dec958f92fddbc551f295c992038a3
+size 231508