michaelfeil commited on
Commit
fd3c607
1 Parent(s): c2c1096

Upload intfloat/e5-large-v2 ctranslate fp16 weights

Browse files
Files changed (4) hide show
  1. README.md +7 -11
  2. model.bin +2 -2
  3. modules.json +20 -0
  4. sentence_bert_config.json +4 -0
README.md CHANGED
@@ -2608,20 +2608,16 @@ Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on
2608
 
2609
  quantized version of [intfloat/e5-large-v2](https://huggingface.co/intfloat/e5-large-v2)
2610
  ```bash
2611
- pip install hf-hub-ctranslate2>=2.10.0 ctranslate2>=3.16.0
2612
  ```
2613
 
2614
  ```python
2615
  # from transformers import AutoTokenizer
2616
  model_name = "michaelfeil/ct2fast-e5-large-v2"
2617
 
2618
- from hf_hub_ctranslate2 import EncoderCT2fromHfHub
2619
- model = EncoderCT2fromHfHub(
2620
- # load in int8 on CUDA
2621
- model_name_or_path=model_name,
2622
- device="cuda",
2623
- compute_type="float16",
2624
- # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
2625
  )
2626
  embeddings = model.encode(
2627
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
@@ -2635,13 +2631,13 @@ scores = (embeddings @ embeddings.T) * 100
2635
  ```
2636
 
2637
  Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
2638
- and [hf-hub-ctranslate2>=2.10.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
2639
  - `compute_type=int8_float16` for `device="cuda"`
2640
  - `compute_type=int8` for `device="cpu"`
2641
 
2642
- Converted on 2023-06-16 using
2643
  ```
2644
- ct2-transformers-converter --model intfloat/e5-large-v2 --output_dir ~/tmp-ct2fast-e5-large-v2 --force --copy_files tokenizer.json README.md tokenizer_config.json vocab.txt special_tokens_map.json .gitattributes --quantization float16 --trust_remote_code
2645
  ```
2646
 
2647
  # Licence and other remarks:
 
2608
 
2609
  quantized version of [intfloat/e5-large-v2](https://huggingface.co/intfloat/e5-large-v2)
2610
  ```bash
2611
+ pip install hf-hub-ctranslate2>=3.0.0 ctranslate2>=3.16.0
2612
  ```
2613
 
2614
  ```python
2615
  # from transformers import AutoTokenizer
2616
  model_name = "michaelfeil/ct2fast-e5-large-v2"
2617
 
2618
+ from hf_hub_ctranslate2 import CT2SentenceTransformer
2619
+ model = CT2SentenceTransformer(
2620
+ model_name, compute_type="int8_float16", device="cuda"
 
 
 
 
2621
  )
2622
  embeddings = model.encode(
2623
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
 
2631
  ```
2632
 
2633
  Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
2634
+ and [hf-hub-ctranslate2>=3.0.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
2635
  - `compute_type=int8_float16` for `device="cuda"`
2636
  - `compute_type=int8` for `device="cpu"`
2637
 
2638
+ Converted on 2023-06-18 using
2639
  ```
2640
+ ct2-transformers-converter --model intfloat/e5-large-v2 --output_dir ~/tmp-ct2fast-e5-large-v2 --force --copy_files tokenizer.json sentence_bert_config.json README.md modules.json special_tokens_map.json vocab.txt tokenizer_config.json .gitattributes --trust_remote_code
2641
  ```
2642
 
2643
  # Licence and other remarks:
model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:074f41ce6dbf6564f709b9bfa09f88894c3b05c85f0eeb515bea9c2b72a1c67f
3
- size 670300108
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a2e972c674871a0be45c33e92a898e4b04256a882fdf6e6a72a2629facaea59
3
+ size 1340583884
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }