michaelfeil commited on
Commit
0f11e04
1 Parent(s): 55f08ae

Upload setu4993/LaBSE ctranslate2 weights

Browse files
Files changed (2) hide show
  1. README.md +27 -7
  2. config.json +28 -5
README.md CHANGED
@@ -128,20 +128,36 @@ Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on
128
 
129
  quantized version of [setu4993/LaBSE](https://huggingface.co/setu4993/LaBSE)
130
  ```bash
131
- pip install hf-hub-ctranslate2>=2.10.0 ctranslate2>=3.16.0
132
  ```
133
 
134
  ```python
135
  # from transformers import AutoTokenizer
136
  model_name = "michaelfeil/ct2fast-LaBSE"
 
137
 
138
  from hf_hub_ctranslate2 import EncoderCT2fromHfHub
139
  model = EncoderCT2fromHfHub(
140
  # load in int8 on CUDA
141
  model_name_or_path=model_name,
142
  device="cuda",
143
- compute_type="float16",
144
- # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  )
146
  embeddings = model.encode(
147
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
@@ -152,16 +168,20 @@ embeddings = model.encode(
152
  print(embeddings.shape, embeddings)
153
  scores = (embeddings @ embeddings.T) * 100
154
 
 
 
 
 
155
  ```
156
 
157
- Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
158
- and [hf-hub-ctranslate2>=2.10.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
159
  - `compute_type=int8_float16` for `device="cuda"`
160
  - `compute_type=int8` for `device="cpu"`
161
 
162
- Converted on 2023-06-16 using
163
  ```
164
- ct2-transformers-converter --model setu4993/LaBSE --output_dir ~/tmp-ct2fast-LaBSE --force --copy_files tokenizer.json README.md tokenizer_config.json vocab.txt special_tokens_map.json .gitattributes --quantization float16 --trust_remote_code
165
  ```
166
 
167
  # Licence and other remarks:
 
128
 
129
  quantized version of [setu4993/LaBSE](https://huggingface.co/setu4993/LaBSE)
130
  ```bash
131
+ pip install hf-hub-ctranslate2>=2.12.0 ctranslate2>=3.17.1
132
  ```
133
 
134
  ```python
135
  # from transformers import AutoTokenizer
136
  model_name = "michaelfeil/ct2fast-LaBSE"
137
+ model_name_orig="setu4993/LaBSE"
138
 
139
  from hf_hub_ctranslate2 import EncoderCT2fromHfHub
140
  model = EncoderCT2fromHfHub(
141
  # load in int8 on CUDA
142
  model_name_or_path=model_name,
143
  device="cuda",
144
+ compute_type="int8_float16"
145
+ )
146
+ outputs = model.generate(
147
+ text=["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
148
+ max_length=64,
149
+ ) # perform downstream tasks on outputs
150
+ outputs["pooler_output"]
151
+ outputs["last_hidden_state"]
152
+ outputs["attention_mask"]
153
+
154
+ # alternative, use SentenceTransformer Mix-In
155
+ # for end-to-end Sentence embeddings generation
156
+ # (not pulling from this CT2fast-HF repo)
157
+
158
+ from hf_hub_ctranslate2 import CT2SentenceTransformer
159
+ model = CT2SentenceTransformer(
160
+ model_name_orig, compute_type="int8_float16", device="cuda"
161
  )
162
  embeddings = model.encode(
163
  ["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
 
168
  print(embeddings.shape, embeddings)
169
  scores = (embeddings @ embeddings.T) * 100
170
 
171
+ # Hint: you can also host this code via REST API and
172
+ # via github.com/michaelfeil/infinity
173
+
174
+
175
  ```
176
 
177
+ Checkpoint compatible to [ctranslate2>=3.17.1](https://github.com/OpenNMT/CTranslate2)
178
+ and [hf-hub-ctranslate2>=2.12.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
179
  - `compute_type=int8_float16` for `device="cuda"`
180
  - `compute_type=int8` for `device="cpu"`
181
 
182
+ Converted on 2023-10-13 using
183
  ```
184
+ LLama-2 -> removed <pad> token.
185
  ```
186
 
187
  # Licence and other remarks:
config.json CHANGED
@@ -1,6 +1,29 @@
1
  {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "layer_norm_epsilon": 1e-12,
5
- "unk_token": "[UNK]"
6
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.29.2",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 501153,
25
+ "bos_token": "<s>",
26
+ "eos_token": "</s>",
27
+ "layer_norm_epsilon": 1e-12,
28
+ "unk_token": "[UNK]"
29
+ }