Sentence Similarity
sentence-transformers
English
bert
ctranslate2
int8
float16
feature-extraction
Inference Endpoints
text-embeddings-inference
5 papers
michaelfeil commited on
Commit
364545a
1 Parent(s): a06b8c9

Upload sentence-transformers/all-MiniLM-L6-v2 ctranslate fp16 weights

Browse files
Files changed (1) hide show
  1. README.md +5 -6
README.md CHANGED
@@ -44,31 +44,30 @@ pip install hf-hub-ctranslate2>=2.11.0 ctranslate2>=3.16.0
44
  ```python
45
  # from transformers import AutoTokenizer
46
  model_name = "michaelfeil/ct2fast-all-MiniLM-L6-v2"
47
- model_name_orig=sentence-transformers/all-MiniLM-L6-v2
48
 
49
  from hf_hub_ctranslate2 import EncoderCT2fromHfHub
50
  model = EncoderCT2fromHfHub(
51
  # load in int8 on CUDA
52
  model_name_or_path=model_name,
53
  device="cuda",
54
- compute_type="int8_float16",
55
  )
56
  outputs = model.generate(
57
  text=["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
58
  max_length=64,
59
- )
60
- # perform downstream tasks on outputs
61
  outputs["pooler_output"]
62
  outputs["last_hidden_state"]
63
  outputs["attention_mask"]
64
 
65
  # alternative, use SentenceTransformer Mix-In
66
  # for end-to-end Sentence embeddings generation
67
- # not pulling from this repo
68
 
69
  from hf_hub_ctranslate2 import CT2SentenceTransformer
70
  model = CT2SentenceTransformer(
71
- model_name_orig, compute_type="int8_float16", device="cuda",
72
  )
73
  embeddings = model.encode(
74
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
 
44
  ```python
45
  # from transformers import AutoTokenizer
46
  model_name = "michaelfeil/ct2fast-all-MiniLM-L6-v2"
47
+ model_name_orig="sentence-transformers/all-MiniLM-L6-v2"
48
 
49
  from hf_hub_ctranslate2 import EncoderCT2fromHfHub
50
  model = EncoderCT2fromHfHub(
51
  # load in int8 on CUDA
52
  model_name_or_path=model_name,
53
  device="cuda",
54
+ compute_type="int8_float16"
55
  )
56
  outputs = model.generate(
57
  text=["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
58
  max_length=64,
59
+ ) # perform downstream tasks on outputs
 
60
  outputs["pooler_output"]
61
  outputs["last_hidden_state"]
62
  outputs["attention_mask"]
63
 
64
  # alternative, use SentenceTransformer Mix-In
65
  # for end-to-end Sentence embeddings generation
66
+ # (not pulling from this CT2fast-HF repo)
67
 
68
  from hf_hub_ctranslate2 import CT2SentenceTransformer
69
  model = CT2SentenceTransformer(
70
+ model_name_orig, compute_type="int8_float16", device="cuda"
71
  )
72
  embeddings = model.encode(
73
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],